Skip to content

kiara.operations.data_import

DataImportModule

create_input_schema(self)

Abstract method to implement by child classes, returns a description of the input schema of this module.

If returning a dictionary of dictionaries, the format of the return value is as follows (items with '*' are optional):

{ "[input_field_name]: { "type": "[value_type]", "doc*": "[a description of this input]", "optional*': [boolean whether this input is optional or required (defaults to 'False')] "[other_input_field_name]: { "type: ... ... }

Source code in kiara/operations/data_import.py
def create_input_schema(
    self,
) -> typing.Mapping[
    str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
]:

    input_name = self.get_config_value("source_profile")
    inputs: typing.Dict[str, typing.Any] = {
        input_name: {
            "type": self.get_config_value("source_type"),
            "doc": f"A {self.get_config_value('source_profile')} '{self.get_config_value('source_type')}' value.",
        },
    }

    # allow_save = self.get_config_value("allow_save_input")
    # save_default = self.get_config_value("save_default")
    # if allow_save:
    #     inputs["save"] = {
    #         "type": "boolean",
    #         "doc": "Whether to save the imported value, or not.",
    #         "default": save_default,
    #     }
    #
    # allow_aliases: typing.Optional[bool] = self.get_config_value(
    #     "allow_aliases_input"
    # )
    # if allow_aliases is None:
    #     allow_aliases = allow_save
    #
    # if allow_aliases and not allow_save and not save_default:
    #     raise Exception(
    #         "Invalid module configuration: allowing aliases input does not make sense if save is disabled."
    #     )
    #
    # if allow_aliases:
    #     default_aliases = self.get_config_value("aliases_default")
    #     inputs["aliases"] = {
    #         "type": "list",
    #         "doc": "A list of aliases to use when storing the value (only applicable if 'save' is set).",
    #         "default": default_aliases,
    #     }

    return inputs

create_output_schema(self)

Abstract method to implement by child classes, returns a description of the output schema of this module.

If returning a dictionary of dictionaries, the format of the return value is as follows (items with '*' are optional):

{ "[output_field_name]: { "type": "[value_type]", "doc*": "[a description of this output]" "[other_input_field_name]: { "type: ... ... }

Source code in kiara/operations/data_import.py
def create_output_schema(
    self,
) -> typing.Mapping[
    str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
]:

    output_name = self.get_target_value_type()
    if output_name == "any":
        output_name = "value_item"

    outputs: typing.Mapping[str, typing.Any] = {
        output_name: {
            "type": self.get_target_value_type(),
            "doc": f"The imported {self.get_target_value_type()} value.",
        },
    }
    return outputs

retrieve_module_profiles(kiara) classmethod

Retrieve a collection of profiles (pre-set module configs) for this kiara module type.

This is used to automatically create generally useful operations (incl. their ids).

Source code in kiara/operations/data_import.py
@classmethod
def retrieve_module_profiles(
    cls, kiara: Kiara
) -> typing.Mapping[str, typing.Union[typing.Mapping[str, typing.Any], Operation]]:

    all_metadata_profiles: typing.Dict[
        str, typing.Dict[str, typing.Dict[str, typing.Any]]
    ] = {}

    sup_type = cls.get_target_value_type()
    if sup_type not in kiara.type_mgmt.value_type_names:
        log_message(
            f"Ignoring data import operation for type '{sup_type}': type not available"
        )
        return {}

    for attr in dir(cls):
        if not attr.startswith("import_from__"):
            continue

        tokens = attr[13:].rsplit("__", maxsplit=1)
        if len(tokens) != 2:
            log_message(
                f"Can't determine source name and type from string in module {cls._module_type_id}, ignoring method: {attr}"  # type: ignore
            )

        source_profile, source_type = tokens

        op_config = {
            "module_type": cls._module_type_id,  # type: ignore
            "module_config": {
                "source_profile": source_profile,
                "source_type": source_type,
            },
            "doc": f"Import data of type '{sup_type}' from a {source_profile} {source_type} and save it to the kiara data store.",
        }
        all_metadata_profiles[
            f"import.{sup_type}.from.{source_profile}"
        ] = op_config

    return all_metadata_profiles

DataImportModuleConfig pydantic-model

source_profile: str pydantic-field required

The name of the source profile. Used to distinguish different input categories for the same input type.

source_type: str pydantic-field required

The type of the source to import from.

FileBundleImportModule

Import a file, optionally saving it to the data store.

FileImportModule

Import a file, optionally saving it to the data store.

ImportDataOperationType

Import data into kiara.

Operations of this type take external data, and register it into kiara. External data is different in that it usually does not come with any metadata on how it was created, who created it, when, etc.

Import operations are created by implementing a class that inherits from DataImportModule, kiara will register it under an operation id following this template:

<IMPORTED_DATA_TYPE>.import_from.<IMPORT_PROFILE>.<INPUT_TYPE>

The meaning of the templated fields is:

  • IMPORTED_DATA_TYPE: the data type of the imported value
  • IMPORT_PROFILE: a short, free-form description of where from (or how) the data is imported
  • INPUT_TYPE: the data type of the user input that points to the data (like a file path, url, query, etc.) -- in most cases this will be some form of a string or uri

There are two main scenarios when an operation of this type is used:

  • 'onboard' data that was created by a 3rd party, or using external processes
  • 're-import' data that as created in kiara, then exported to be transformed in an external process, and then imported again into kiara

In both of those scenarios, we'll need to have a way to add metadata to fill out 'holes' in the metadata 'chold chain'. We don't have a concept yet as to how to do that, but that is planned for the future.

get_import_operations_for_target_type(self, value_type)

Return all available import operations that produce data of the specified type.

Source code in kiara/operations/data_import.py
def get_import_operations_for_target_type(
    self, value_type: str
) -> typing.Dict[str, typing.Dict[str, Operation]]:
    """Return all available import operations that produce data of the specified type."""

    return self.get_import_operations_per_target_type().get(value_type, {})

get_import_operations_per_target_type(self)

Return all available import operations per value type.

The result dictionary uses the source type as first level key, a source name/description as 2nd level key, and the Operation object as value.

Source code in kiara/operations/data_import.py
def get_import_operations_per_target_type(
    self,
) -> typing.Dict[str, typing.Dict[str, typing.Dict[str, Operation]]]:
    """Return all available import operations per value type.

    The result dictionary uses the source type as first level key, a source name/description as 2nd level key,
    and the Operation object as value.
    """

    result: typing.Dict[str, typing.Dict[str, typing.Dict[str, Operation]]] = {}

    for op_config in self.operations.values():

        target_type: str = op_config.module_cls.get_target_value_type()  # type: ignore

        source_type = op_config.module_config["source_type"]
        source_profile = op_config.module_config["source_profile"]

        result.setdefault(target_type, {}).setdefault(source_type, {})[
            source_profile
        ] = op_config

    return result