Bases: KiaraModule
  
      Download a dataset from zenodo.org.
        
          Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
          |  22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106 | class ZenodoDownload(KiaraModule):
    """Download a dataset from zenodo.org."""
    _module_type_name = "onboard.zenodo_record"
    _config_cls = ZenodoDownloadConfig
    def create_inputs_schema(
        self,
    ) -> ValueMapSchema:
        metadata_filename = self.get_config_value("metadata_filename")
        return {
            "doi": {"type": "string", "doc": "The doi of the record"},
            "include_metadata": {
                "type": "boolean",
                "doc": f"Whether to write the record metadata to a file '{metadata_filename}' and include it in the resulting file bundle.",
                "default": True,
            },
        }
    def create_outputs_schema(
        self,
    ) -> ValueMapSchema:
        return {
            "file_bundle": {
                "type": "file_bundle",
            }
        }
    def download_file(self, file_data: Mapping[str, Any], target_path: Path):
        import httpx
        url = file_data["links"]["self"]
        file_name = file_data["key"]
        checksum = file_data["checksum"][4:]
        target_file = target_path / file_name
        if target_file.exists():
            raise KiaraProcessingException(
                f"Can't download file, target path already exists: {target_path.as_posix()}."
            )
        hash_md5 = hashlib.md5()  # noqa
        with open(target_file, "ab") as file2:
            with httpx.Client() as client:
                with client.stream("GET", url) as resp:
                    for chunk in resp.iter_bytes():
                        hash_md5.update(chunk)
                        file2.write(chunk)
        if checksum != hash_md5.hexdigest():
            raise KiaraProcessingException(
                f"Can't downloda file '{file_name}', invalid checksum: {checksum} != {hash_md5.hexdigest()}"
            )
        return target_file
    def process(self, inputs: ValueMap, outputs: ValueMap):
        import pyzenodo3
        include_metadata = inputs.get_value_data("include_metadata")
        doi = inputs.get_value_data("doi")
        zen = pyzenodo3.Zenodo()
        record = zen.find_record_by_doi(doi)
        path = KiaraFileBundle.create_tmp_dir()
        shutil.rmtree(path, ignore_errors=True)
        path.mkdir()
        for file_data in record.data["files"]:
            self.download_file(file_data, path)
        if include_metadata:
            metadata_filename = self.get_config_value("metadata_filename")
            metadata_file = path / metadata_filename
            metadata_file.write_bytes(orjson.dumps(record.data))
        bundle = KiaraFileBundle.import_folder(path.as_posix())
        outputs.set_value("file_bundle", bundle)
 | 
 
  
  
Attributes
_config_cls = ZenodoDownloadConfig
  
  
      instance-attribute
      class-attribute
  
  
  
 
Functions
  
      
        Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
        | 28
29
30
31
32
33
34
35
36
37
38
39
40 | def create_inputs_schema(
    self,
) -> ValueMapSchema:
    metadata_filename = self.get_config_value("metadata_filename")
    return {
        "doi": {"type": "string", "doc": "The doi of the record"},
        "include_metadata": {
            "type": "boolean",
            "doc": f"Whether to write the record metadata to a file '{metadata_filename}' and include it in the resulting file bundle.",
            "default": True,
        },
    }
 | 
 
   
 
create_outputs_schema() -> ValueMapSchema
  
      
        Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
        | 42
43
44
45
46
47
48
49
50 | def create_outputs_schema(
    self,
) -> ValueMapSchema:
    return {
        "file_bundle": {
            "type": "file_bundle",
        }
    }
 | 
 
   
 
download_file(file_data: Mapping[str, Any], target_path: Path)
  
      
        Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
        | 52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 | def download_file(self, file_data: Mapping[str, Any], target_path: Path):
    import httpx
    url = file_data["links"]["self"]
    file_name = file_data["key"]
    checksum = file_data["checksum"][4:]
    target_file = target_path / file_name
    if target_file.exists():
        raise KiaraProcessingException(
            f"Can't download file, target path already exists: {target_path.as_posix()}."
        )
    hash_md5 = hashlib.md5()  # noqa
    with open(target_file, "ab") as file2:
        with httpx.Client() as client:
            with client.stream("GET", url) as resp:
                for chunk in resp.iter_bytes():
                    hash_md5.update(chunk)
                    file2.write(chunk)
    if checksum != hash_md5.hexdigest():
        raise KiaraProcessingException(
            f"Can't downloda file '{file_name}', invalid checksum: {checksum} != {hash_md5.hexdigest()}"
        )
    return target_file
 | 
 
   
 
process(inputs: ValueMap, outputs: ValueMap)
  
      
        Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
        |  83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106 | def process(self, inputs: ValueMap, outputs: ValueMap):
    import pyzenodo3
    include_metadata = inputs.get_value_data("include_metadata")
    doi = inputs.get_value_data("doi")
    zen = pyzenodo3.Zenodo()
    record = zen.find_record_by_doi(doi)
    path = KiaraFileBundle.create_tmp_dir()
    shutil.rmtree(path, ignore_errors=True)
    path.mkdir()
    for file_data in record.data["files"]:
        self.download_file(file_data, path)
    if include_metadata:
        metadata_filename = self.get_config_value("metadata_filename")
        metadata_file = path / metadata_filename
        metadata_file.write_bytes(orjson.dumps(record.data))
    bundle = KiaraFileBundle.import_folder(path.as_posix())
    outputs.set_value("file_bundle", bundle)
 |