module_types
import.file
Documentation
A generic module that imports a file from one of
several possible sources.
Author(s)
Markus Binsteiner markus@frkl.io
Context
Tags onboarding
Labels package: kiara_plugin.onboarding
References source_repo:
https://github.com/DHARPA-Project/kia…
documentation:
https://DHARPA-Project.github.io/kiar…
Module config schema
Field Type Descrip… Required Default
─────────────────────────────────────────────────────
attach_m… boolean Whether no
to
attach
metadat…
constants object Value no
constan…
for this
module.
defaults object Value no
defaults
for this
module.
onboard_… string The name no
of the
type of
onboard…
Python class
python_class_name OnboardFileModule
python_module_name kiara_plugin.onboarding.modul…
full_name kiara_plugin.onboarding.modul…
Processing source code ─────────────────────────────────────────────────────
class OnboardFileModule(KiaraModule):
"""A generic module that imports a file from o…
_module_type_name = "import.file"
_config_cls = OnboardFileConfig
def create_inputs_schema(
self,
) -> ValueMapSchema:
result = {
"source": {
"type": "string",
"doc": "The source uri of the file…
"optional": False,
},
"file_name": {
"type": "string",
"doc": "The file name to use for t…
"optional": True,
},
}
if self.get_config_value("attach_metadata"…
result["attach_metadata"] = {
"type": "boolean",
"doc": "Whether to attach onboardi…
"default": True,
}
onboard_type: Union[str, None] = self.get_…
if not onboard_type:
onboard_model_cls = None
else:
onboard_model_cls = get_onboard_model_…
if not onboard_model_cls:
available = (
ModelRegistry.instance()
.get_models_of_type(OnboardDataMod…
.item_infos.keys()
)
if not available:
raise KiaraException(msg="No onboa…
idx = len(ONBOARDING_MODEL_NAME_PREFIX)
allowed = sorted((x[idx:] for x in ava…
result["onboard_type"] = {
"type": "string",
"type_config": {"allowed_strings":…
"doc": "The type of onboarding to …
", ".join(allowed)
),
"optional": True,
}
elif onboard_model_cls.get_config_fields():
result = {
"onboard_config": {
"type": "kiara_model",
"type_config": {
"kiara_model_id": self.get…
},
}
}
return result
def create_outputs_schema(
self,
) -> ValueMapSchema:
result = {"file": {"type": "file", "doc": …
return result
def process(self, inputs: ValueMap, outputs: V…
onboard_type = self.get_config_value("onbo…
source: str = inputs.get_value_data("sourc…
file_name: Union[str, None] = inputs.get_v…
if not onboard_type:
user_input_onboard_type = inputs.get_v…
if user_input_onboard_type:
onboard_type = (
f"{ONBOARDING_MODEL_NAME_PREFI…
)
attach_metadata = self.get_config_value("a…
if attach_metadata is None:
attach_metadata = inputs.get_value_dat…
data = onboard_file(
source=source,
file_name=file_name,
onboard_type=onboard_type,
attach_metadata=attach_metadata,
)
outputs.set_value("file", data)
─────────────────────────────────────────────────────
import.file_bundle
Documentation
A generic module that imports a file from one of
several possible sources.
Author(s)
Markus Binsteiner markus@frkl.io
Context
Tags onboarding
Labels package: kiara_plugin.onboarding
References source_repo:
https://github.com/DHARPA-Project/kia…
documentation:
https://DHARPA-Project.github.io/kiar…
Module config schema
Field Type Descrip… Required Default
─────────────────────────────────────────────────────
attach_m… boolean Whether no
to
attach
onboard…
metadat…
constants object Value no
constan…
for this
module.
defaults object Value no
defaults
for this
module.
exclude_… array Exclude no
directo…
that end
with one
of those
tokens.
exclude_… array File no
types to
include.
include_… array File no
types to
include.
onboard_… string The name no
of the
type of
onboard…
sub_path string The sub no
path to
use.
Python class
python_class_name OnboardFileBundleModule
python_module_name kiara_plugin.onboarding.modul…
full_name kiara_plugin.onboarding.modul…
Processing source code ─────────────────────────────────────────────────────
class OnboardFileBundleModule(KiaraModule):
"""A generic module that imports a file from o…
_module_type_name = "import.file_bundle"
_config_cls = OnboardFileBundleConfig
def create_inputs_schema(
self,
) -> ValueMapSchema:
result = {
"source": {
"type": "string",
"doc": "The source uri of the file…
"optional": False,
}
}
if self.get_config_value("attach_metadata"…
result["attach_metadata"] = {
"type": "boolean",
"doc": "Whether to attach onboardi…
"default": True,
}
if self.get_config_value("sub_path") is No…
result["sub_path"] = {
"type": "string",
"doc": "The sub path to use. If no…
"optional": True,
}
if self.get_config_value("include_files") …
result["include_files"] = {
"type": "list",
"doc": "Include files that end wit…
"optional": True,
}
if self.get_config_value("exclude_files") …
result["exclude_files"] = {
"type": "list",
"doc": "Exclude files that end wit…
"optional": True,
}
if self.get_config_value("exclude_dirs") i…
result["exclude_dirs"] = {
"type": "list",
"doc": "Exclude directories that e…
"optional": True,
}
onboard_type: Union[str, None] = self.get_…
if not onboard_type:
onboard_model_cls = None
else:
onboard_model_cls = get_onboard_model_…
if not onboard_model_cls:
available = (
ModelRegistry.instance()
.get_models_of_type(OnboardDataMod…
.item_infos.keys()
)
if not available:
raise KiaraException(msg="No onboa…
idx = len(ONBOARDING_MODEL_NAME_PREFIX)
allowed = sorted((x[idx:] for x in ava…
result["onboard_type"] = {
"type": "string",
"type_config": {"allowed_strings":…
"doc": "The type of onboarding to …
", ".join(allowed)
),
"optional": True,
}
elif onboard_model_cls.get_config_fields():
result = {
"onboard_config": {
"type": "kiara_model",
"type_config": {
"kiara_model_id": self.get…
},
}
}
return result
def create_outputs_schema(
self,
) -> ValueMapSchema:
result = {
"file_bundle": {
"type": "file_bundle",
"doc": "The file_bundle that was o…
}
}
return result
def process(self, inputs: ValueMap, outputs: V…
onboard_type = self.get_config_value("onbo…
source: str = inputs.get_value_data("sourc…
if onboard_type:
user_input_onboard_type = inputs.get_v…
if not user_input_onboard_type:
onboard_type = (
f"{ONBOARDING_MODEL_NAME_PREFI…
)
sub_path = self.get_config_value("sub_path…
if sub_path is None:
sub_path = inputs.get_value_data("sub_…
include = self.get_config_value("include_f…
if include is None:
_include = inputs.get_value_data("incl…
if _include:
include = _include.list_data
exclude = self.get_config_value("exclude_f…
if exclude is None:
_exclude = inputs.get_value_data("excl…
if _exclude:
exclude = _exclude.list_data
exclude_dirs = self.get_config_value("excl…
if exclude_dirs is None:
_exclude_dirs = inputs.get_value_data(…
if _exclude_dirs:
exclude_dirs = _exclude_dirs.list_…
import_config_data = {
"sub_path": sub_path,
}
if include:
import_config_data["include_files"] = …
if exclude:
import_config_data["exclude_files"] = …
if exclude_dirs:
import_config_data["exclude_dirs"] = e…
import_config = FolderImportConfig(**impor…
attach_metadata = self.get_config_value("a…
if attach_metadata is None:
attach_metadata = inputs.get_value_dat…
imported_bundle = onboard_file_bundle(
source=source,
import_config=import_config,
onboard_type=onboard_type,
attach_metadata=attach_metadata,
)
outputs.set_value("file_bundle", imported_…
─────────────────────────────────────────────────────
onboard.zenodo_record
Documentation
Download a dataset from zenodo.org.
Author(s)
Markus Binsteiner markus@frkl.io
Context
Tags onboarding
Labels package: kiara_plugin.onboarding
References source_repo:
https://github.com/DHARPA-Project/kia…
documentation:
https://DHARPA-Project.github.io/kiar…
Module config schema
Field Type Descript… Required Default
─────────────────────────────────────────────────────
constan… object Value no
constants
for this
module.
defaults object Value no
defaults
for this
module.
metadat… string The no "metada…
filename
for the
zenodo
metadata.
Python class
python_class_name ZenodoDownload
python_module_name kiara_plugin.onboarding.modul…
full_name kiara_plugin.onboarding.modul…
Processing source code ─────────────────────────────────────────────────────
class ZenodoDownload(KiaraModule):
"""Download a dataset from zenodo.org."""
_module_type_name = "onboard.zenodo_record"
_config_cls = ZenodoDownloadConfig
def create_inputs_schema(
self,
) -> ValueMapSchema:
metadata_filename = self.get_config_value(…
return {
"doi": {"type": "string", "doc": "The …
"include_metadata": {
"type": "boolean",
"doc": f"Whether to write the reco…
"default": True,
},
}
def create_outputs_schema(
self,
) -> ValueMapSchema:
return {
"file_bundle": {
"type": "file_bundle",
}
}
def download_file(self, file_data: Mapping[str…
import httpx
url = file_data["links"]["self"]
file_name = file_data["key"]
checksum = file_data["checksum"][4:]
target_file = target_path / file_name
if target_file.exists():
raise KiaraProcessingException(
f"Can't download file, target path…
)
hash_md5 = hashlib.md5() # noqa
with open(target_file, "ab") as file2:
with httpx.Client() as client:
with client.stream("GET", url) as …
for chunk in resp.iter_bytes():
hash_md5.update(chunk)
file2.write(chunk)
if checksum != hash_md5.hexdigest():
raise KiaraProcessingException(
f"Can't downloda file '{file_name}…
)
return target_file
def process(self, inputs: ValueMap, outputs: V…
import pyzenodo3
include_metadata = inputs.get_value_data("…
doi = inputs.get_value_data("doi")
zen = pyzenodo3.Zenodo()
record = zen.find_record_by_doi(doi)
path = KiaraFileBundle.create_tmp_dir()
shutil.rmtree(path, ignore_errors=True)
path.mkdir()
for file_data in record.data["files"]:
self.download_file(file_data, path)
if include_metadata:
metadata_filename = self.get_config_va…
metadata_file = path / metadata_filena…
metadata_file.write_bytes(orjson.dumps…
bundle = KiaraFileBundle.import_folder(pat…
outputs.set_value("file_bundle", bundle)
─────────────────────────────────────────────────────