kiara.operations.data_import¶
DataImportModule
¶
create_input_schema(self)
¶
Abstract method to implement by child classes, returns a description of the input schema of this module.
If returning a dictionary of dictionaries, the format of the return value is as follows (items with '*' are optional):
{
"[input_field_name]: {
"type": "[value_type]",
"doc*": "[a description of this input]",
"optional*': [boolean whether this input is optional or required (defaults to 'False')]
"[other_input_field_name]: {
"type: ...
...
}
Source code in kiara/operations/data_import.py
def create_input_schema(
self,
) -> typing.Mapping[
str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
]:
input_name = self.get_config_value("source_profile")
inputs: typing.Dict[str, typing.Any] = {
input_name: {
"type": self.get_config_value("source_type"),
"doc": f"A {self.get_config_value('source_profile')} '{self.get_config_value('source_type')}' value.",
},
}
# allow_save = self.get_config_value("allow_save_input")
# save_default = self.get_config_value("save_default")
# if allow_save:
# inputs["save"] = {
# "type": "boolean",
# "doc": "Whether to save the imported value, or not.",
# "default": save_default,
# }
#
# allow_aliases: typing.Optional[bool] = self.get_config_value(
# "allow_aliases_input"
# )
# if allow_aliases is None:
# allow_aliases = allow_save
#
# if allow_aliases and not allow_save and not save_default:
# raise Exception(
# "Invalid module configuration: allowing aliases input does not make sense if save is disabled."
# )
#
# if allow_aliases:
# default_aliases = self.get_config_value("aliases_default")
# inputs["aliases"] = {
# "type": "list",
# "doc": "A list of aliases to use when storing the value (only applicable if 'save' is set).",
# "default": default_aliases,
# }
return inputs
create_output_schema(self)
¶
Abstract method to implement by child classes, returns a description of the output schema of this module.
If returning a dictionary of dictionaries, the format of the return value is as follows (items with '*' are optional):
{
"[output_field_name]: {
"type": "[value_type]",
"doc*": "[a description of this output]"
"[other_input_field_name]: {
"type: ...
...
}
Source code in kiara/operations/data_import.py
def create_output_schema(
self,
) -> typing.Mapping[
str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
]:
output_name = self.get_target_value_type()
if output_name == "any":
output_name = "value_item"
outputs: typing.Mapping[str, typing.Any] = {
output_name: {
"type": self.get_target_value_type(),
"doc": f"The imported {self.get_target_value_type()} value.",
},
}
return outputs
retrieve_module_profiles(kiara)
classmethod
¶
Retrieve a collection of profiles (pre-set module configs) for this kiara module type.
This is used to automatically create generally useful operations (incl. their ids).
Source code in kiara/operations/data_import.py
@classmethod
def retrieve_module_profiles(
cls, kiara: Kiara
) -> typing.Mapping[str, typing.Union[typing.Mapping[str, typing.Any], Operation]]:
all_metadata_profiles: typing.Dict[
str, typing.Dict[str, typing.Dict[str, typing.Any]]
] = {}
sup_type = cls.get_target_value_type()
if sup_type not in kiara.type_mgmt.value_type_names:
log_message(
f"Ignoring data import operation for type '{sup_type}': type not available"
)
return {}
for attr in dir(cls):
if not attr.startswith("import_from__"):
continue
tokens = attr[13:].rsplit("__", maxsplit=1)
if len(tokens) != 2:
log_message(
f"Can't determine source name and type from string in module {cls._module_type_id}, ignoring method: {attr}" # type: ignore
)
source_profile, source_type = tokens
op_config = {
"module_type": cls._module_type_id, # type: ignore
"module_config": {
"source_profile": source_profile,
"source_type": source_type,
},
"doc": f"Import data of type '{sup_type}' from a {source_profile} {source_type} and save it to the kiara data store.",
}
all_metadata_profiles[
f"import.{sup_type}.from.{source_profile}"
] = op_config
return all_metadata_profiles
DataImportModuleConfig
pydantic-model
¶
FileBundleImportModule
¶
Import a file, optionally saving it to the data store.
FileImportModule
¶
Import a file, optionally saving it to the data store.
ImportDataOperationType
¶
Import data into kiara.
Operations of this type take external data, and register it into kiara. External data is different in that it usually does not come with any metadata on how it was created, who created it, when, etc.
Import operations are created by implementing a class that inherits from DataImportModule, kiara will register it under an operation id following this template:
<IMPORTED_DATA_TYPE>.import_from.<IMPORT_PROFILE>.<INPUT_TYPE>
The meaning of the templated fields is:
IMPORTED_DATA_TYPE
: the data type of the imported valueIMPORT_PROFILE
: a short, free-form description of where from (or how) the data is importedINPUT_TYPE
: the data type of the user input that points to the data (like a file path, url, query, etc.) -- in most cases this will be some form of a string or uri
There are two main scenarios when an operation of this type is used:
- 'onboard' data that was created by a 3rd party, or using external processes
- 're-import' data that as created in kiara, then exported to be transformed in an external process, and then imported again into kiara
In both of those scenarios, we'll need to have a way to add metadata to fill out 'holes' in the metadata 'chold chain'. We don't have a concept yet as to how to do that, but that is planned for the future.
get_import_operations_for_target_type(self, value_type)
¶
Return all available import operations that produce data of the specified type.
Source code in kiara/operations/data_import.py
def get_import_operations_for_target_type(
self, value_type: str
) -> typing.Dict[str, typing.Dict[str, Operation]]:
"""Return all available import operations that produce data of the specified type."""
return self.get_import_operations_per_target_type().get(value_type, {})
get_import_operations_per_target_type(self)
¶
Return all available import operations per value type.
The result dictionary uses the source type as first level key, a source name/description as 2nd level key, and the Operation object as value.
Source code in kiara/operations/data_import.py
def get_import_operations_per_target_type(
self,
) -> typing.Dict[str, typing.Dict[str, typing.Dict[str, Operation]]]:
"""Return all available import operations per value type.
The result dictionary uses the source type as first level key, a source name/description as 2nd level key,
and the Operation object as value.
"""
result: typing.Dict[str, typing.Dict[str, typing.Dict[str, Operation]]] = {}
for op_config in self.operations.values():
target_type: str = op_config.module_cls.get_target_value_type() # type: ignore
source_type = op_config.module_config["source_type"]
source_profile = op_config.module_config["source_profile"]
result.setdefault(target_type, {}).setdefault(source_type, {})[
source_profile
] = op_config
return result