Skip to content

zenodo

Classes

ZenodoDownloadConfig

Bases: KiaraModuleConfig

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
15
16
17
18
19
class ZenodoDownloadConfig(KiaraModuleConfig):

    metadata_filename: str = Field(
        description="The filename for the zenodo metadata.", default="metadata.json"
    )

Attributes

metadata_filename: str = Field(description='The filename for the zenodo metadata.', default='metadata.json') instance-attribute class-attribute

ZenodoDownload

Bases: KiaraModule

Download a dataset from zenodo.org.

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
class ZenodoDownload(KiaraModule):
    """Download a dataset from zenodo.org."""

    _module_type_name = "onboard.zenodo_record"
    _config_cls = ZenodoDownloadConfig

    def create_inputs_schema(
        self,
    ) -> ValueMapSchema:

        metadata_filename = self.get_config_value("metadata_filename")
        return {
            "doi": {"type": "string", "doc": "The doi of the record"},
            "include_metadata": {
                "type": "boolean",
                "doc": f"Whether to write the record metadata to a file '{metadata_filename}' and include it in the resulting file bundle.",
                "default": True,
            },
        }

    def create_outputs_schema(
        self,
    ) -> ValueMapSchema:

        return {
            "file_bundle": {
                "type": "file_bundle",
            }
        }

    def download_file(self, file_data: Mapping[str, Any], target_path: Path):

        import httpx

        url = file_data["links"]["self"]
        file_name = file_data["key"]
        checksum = file_data["checksum"][4:]

        target_file = target_path / file_name

        if target_file.exists():
            raise KiaraProcessingException(
                f"Can't download file, target path already exists: {target_path.as_posix()}."
            )

        hash_md5 = hashlib.md5()  # noqa

        with open(target_file, "ab") as file2:
            with httpx.Client() as client:
                with client.stream("GET", url) as resp:
                    for chunk in resp.iter_bytes():
                        hash_md5.update(chunk)
                        file2.write(chunk)

        if checksum != hash_md5.hexdigest():
            raise KiaraProcessingException(
                f"Can't downloda file '{file_name}', invalid checksum: {checksum} != {hash_md5.hexdigest()}"
            )

        return target_file

    def process(self, inputs: ValueMap, outputs: ValueMap):

        import pyzenodo3

        include_metadata = inputs.get_value_data("include_metadata")

        doi = inputs.get_value_data("doi")
        zen = pyzenodo3.Zenodo()

        record = zen.find_record_by_doi(doi)

        path = KiaraFileBundle.create_tmp_dir()
        shutil.rmtree(path, ignore_errors=True)
        path.mkdir()
        for file_data in record.data["files"]:
            self.download_file(file_data, path)

        if include_metadata:
            metadata_filename = self.get_config_value("metadata_filename")
            metadata_file = path / metadata_filename
            metadata_file.write_bytes(orjson.dumps(record.data))

        bundle = KiaraFileBundle.import_folder(path.as_posix())
        outputs.set_value("file_bundle", bundle)

Attributes

_config_cls = ZenodoDownloadConfig instance-attribute class-attribute

Functions

create_inputs_schema() -> ValueMapSchema
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
28
29
30
31
32
33
34
35
36
37
38
39
40
def create_inputs_schema(
    self,
) -> ValueMapSchema:

    metadata_filename = self.get_config_value("metadata_filename")
    return {
        "doi": {"type": "string", "doc": "The doi of the record"},
        "include_metadata": {
            "type": "boolean",
            "doc": f"Whether to write the record metadata to a file '{metadata_filename}' and include it in the resulting file bundle.",
            "default": True,
        },
    }
create_outputs_schema() -> ValueMapSchema
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
42
43
44
45
46
47
48
49
50
def create_outputs_schema(
    self,
) -> ValueMapSchema:

    return {
        "file_bundle": {
            "type": "file_bundle",
        }
    }
download_file(file_data: Mapping[str, Any], target_path: Path)
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def download_file(self, file_data: Mapping[str, Any], target_path: Path):

    import httpx

    url = file_data["links"]["self"]
    file_name = file_data["key"]
    checksum = file_data["checksum"][4:]

    target_file = target_path / file_name

    if target_file.exists():
        raise KiaraProcessingException(
            f"Can't download file, target path already exists: {target_path.as_posix()}."
        )

    hash_md5 = hashlib.md5()  # noqa

    with open(target_file, "ab") as file2:
        with httpx.Client() as client:
            with client.stream("GET", url) as resp:
                for chunk in resp.iter_bytes():
                    hash_md5.update(chunk)
                    file2.write(chunk)

    if checksum != hash_md5.hexdigest():
        raise KiaraProcessingException(
            f"Can't downloda file '{file_name}', invalid checksum: {checksum} != {hash_md5.hexdigest()}"
        )

    return target_file
process(inputs: ValueMap, outputs: ValueMap)
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def process(self, inputs: ValueMap, outputs: ValueMap):

    import pyzenodo3

    include_metadata = inputs.get_value_data("include_metadata")

    doi = inputs.get_value_data("doi")
    zen = pyzenodo3.Zenodo()

    record = zen.find_record_by_doi(doi)

    path = KiaraFileBundle.create_tmp_dir()
    shutil.rmtree(path, ignore_errors=True)
    path.mkdir()
    for file_data in record.data["files"]:
        self.download_file(file_data, path)

    if include_metadata:
        metadata_filename = self.get_config_value("metadata_filename")
        metadata_file = path / metadata_filename
        metadata_file.write_bytes(orjson.dumps(record.data))

    bundle = KiaraFileBundle.import_folder(path.as_posix())
    outputs.set_value("file_bundle", bundle)