Skip to content

file_bundle

DefaultFileBundleImportModule (FileBundleImportModule)

Import a file bundle into the kiara data store.

This module will support multiple source types and profiles in the future, but at the moment only import from local folder is supported. Thus, requiring the config value 'local' for 'source_profile', and 'folder_path' for 'source_type'.

Source code in core/file_bundle.py
class DefaultFileBundleImportModule(FileBundleImportModule):
    """Import a file bundle into the kiara data store.

    This module will support multiple source types and profiles in the future, but at the moment only import from
    local folder is supported. Thus, requiring the config value 'local' for 'source_profile', and 'folder_path' for 'source_type'.
    """

    _module_type_name = "import"

    def import_from__folder_path__string(self, source: str) -> KiaraFileBundle:

        file_bundle_model = KiaraFileBundle.import_folder(source)
        return file_bundle_model

LoadFileBundleModule (KiaraModule)

Load a file bundle and its metadata.

This module does not read or load the content of all included files, but contains the path to the local representation/version of them so they can be read by a subsequent process.

Source code in core/file_bundle.py
class LoadFileBundleModule(KiaraModule):
    """Load a file bundle and its metadata.

    This module does not read or load the content of all included files, but contains the path to the local representation/version of them
    so they can be read by a subsequent process.
    """

    _module_type_name = "load"

    def create_input_schema(
        self,
    ) -> typing.Mapping[
        str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
    ]:
        return {
            "base_path": {
                "type": "string",
                "doc": "The base path where the folder lives.",
            },
            "rel_path": {
                "type": "string",
                "doc": "The relative path of the folder, within the base path location.",
            },
            "include_files": {
                "type": "list",
                "doc": "A list of strings, include all files where the filename ends with one of those strings.\n\nOnly full string matches are supported at the moment, globs and regex might be in the future.",
                "optional": True,
            },
            "exclude_dirs": {
                "type": "list",
                "doc": f"A list of strings, exclude all folders whose name ends with that string. Defaults to: {DEFAULT_EXCLUDE_DIRS}.\n\nOnly full string matches are supported at the moment, globs and regex might be in the future.",
                "default": DEFAULT_EXCLUDE_DIRS,
                "optional": True,
            },
            "exclude_files": {
                "type": "list",
                "doc": f"A list of strings, exclude all files that end with that one of those strings (takes precedence over 'include_files'). Defaults to: {DEFAULT_EXCLUDE_FILES}\n\nOnly full string matches are supported at the moment, globs and regex might be in the future.",
                "default": DEFAULT_EXCLUDE_FILES,
                "optional": True,
            },
        }

    def create_output_schema(
        self,
    ) -> typing.Mapping[
        str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
    ]:

        return {
            "file_bundle": {
                "type": "file_bundle",
                "doc": "The collection of files contained in the bundle.",
            }
        }

    def process(self, inputs: ValueSet, outputs: ValueSet) -> None:

        base_path = inputs.get_value_data("base_path")
        rel_path = inputs.get_value_data("rel_path")

        path = os.path.join(base_path, rel_path)

        included_files = inputs.get_value_data("include_files")
        excluded_dirs = inputs.get_value_data("exclude_dirs")
        excluded_files = inputs.get_value_data("exclude_files")

        import_config = FolderImportConfig(
            include_files=included_files,
            exclude_dirs=excluded_dirs,
            excluded_files=excluded_files,
        )

        bundle = KiaraFileBundle.import_folder(source=path, import_config=import_config)

        outputs.set_values(file_bundle=bundle)

create_input_schema(self)

Abstract method to implement by child classes, returns a description of the input schema of this module.

If returning a dictionary of dictionaries, the format of the return value is as follows (items with '*' are optional):

{ "[input_field_name]: { "type": "[value_type]", "doc*": "[a description of this input]", "optional*': [boolean whether this input is optional or required (defaults to 'False')] "[other_input_field_name]: { "type: ... ... }

Source code in core/file_bundle.py
def create_input_schema(
    self,
) -> typing.Mapping[
    str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
]:
    return {
        "base_path": {
            "type": "string",
            "doc": "The base path where the folder lives.",
        },
        "rel_path": {
            "type": "string",
            "doc": "The relative path of the folder, within the base path location.",
        },
        "include_files": {
            "type": "list",
            "doc": "A list of strings, include all files where the filename ends with one of those strings.\n\nOnly full string matches are supported at the moment, globs and regex might be in the future.",
            "optional": True,
        },
        "exclude_dirs": {
            "type": "list",
            "doc": f"A list of strings, exclude all folders whose name ends with that string. Defaults to: {DEFAULT_EXCLUDE_DIRS}.\n\nOnly full string matches are supported at the moment, globs and regex might be in the future.",
            "default": DEFAULT_EXCLUDE_DIRS,
            "optional": True,
        },
        "exclude_files": {
            "type": "list",
            "doc": f"A list of strings, exclude all files that end with that one of those strings (takes precedence over 'include_files'). Defaults to: {DEFAULT_EXCLUDE_FILES}\n\nOnly full string matches are supported at the moment, globs and regex might be in the future.",
            "default": DEFAULT_EXCLUDE_FILES,
            "optional": True,
        },
    }

create_output_schema(self)

Abstract method to implement by child classes, returns a description of the output schema of this module.

If returning a dictionary of dictionaries, the format of the return value is as follows (items with '*' are optional):

{ "[output_field_name]: { "type": "[value_type]", "doc*": "[a description of this output]" "[other_input_field_name]: { "type: ... ... }

Source code in core/file_bundle.py
def create_output_schema(
    self,
) -> typing.Mapping[
    str, typing.Union[ValueSchema, typing.Mapping[str, typing.Any]]
]:

    return {
        "file_bundle": {
            "type": "file_bundle",
            "doc": "The collection of files contained in the bundle.",
        }
    }

StoreFileBundleType (StoreValueTypeModule)

Save a file bundle to disk.

Source code in core/file_bundle.py
class StoreFileBundleType(StoreValueTypeModule):
    """Save a file bundle to disk."""

    _module_type_name = "store"

    @classmethod
    def retrieve_supported_types(cls) -> typing.Union[str, typing.Iterable[str]]:
        return "file_bundle"

    def store_value(
        self, value: Value, base_path: str
    ) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Any]:

        bundle: KiaraFileBundle = value.get_value_data()
        rel_path = bundle.bundle_name

        target_path = os.path.join(base_path, rel_path)
        fb = bundle.copy_bundle(target_path, is_onboarded=True)

        # # the following changes the input value, which is usually not allowed, but the file_bundle type is a special case
        # bundle.included_files = fb.included_files
        # bundle.is_onboarded = True
        # bundle.path = fb.path
        # for path, f in bundle.included_files.items():
        #     f.is_onboarded = True

        load_config = {
            "module_type": "file_bundle.load",
            "base_path_input_name": "base_path",
            "inputs": {"base_path": base_path, "rel_path": rel_path},
            "output_name": "file_bundle",
        }

        return (load_config, fb)

store_value(self, value, base_path)

Save the value, and return the load config needed to load it again.

Source code in core/file_bundle.py
def store_value(
    self, value: Value, base_path: str
) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Any]:

    bundle: KiaraFileBundle = value.get_value_data()
    rel_path = bundle.bundle_name

    target_path = os.path.join(base_path, rel_path)
    fb = bundle.copy_bundle(target_path, is_onboarded=True)

    # # the following changes the input value, which is usually not allowed, but the file_bundle type is a special case
    # bundle.included_files = fb.included_files
    # bundle.is_onboarded = True
    # bundle.path = fb.path
    # for path, f in bundle.included_files.items():
    #     f.is_onboarded = True

    load_config = {
        "module_type": "file_bundle.load",
        "base_path_input_name": "base_path",
        "inputs": {"base_path": base_path, "rel_path": rel_path},
        "output_name": "file_bundle",
    }

    return (load_config, fb)