Bases: KiaraModule
Download a dataset from zenodo.org.
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106 | class ZenodoDownload(KiaraModule):
"""Download a dataset from zenodo.org."""
_module_type_name = "onboard.zenodo_record"
_config_cls = ZenodoDownloadConfig
def create_inputs_schema(
self,
) -> ValueMapSchema:
metadata_filename = self.get_config_value("metadata_filename")
return {
"doi": {"type": "string", "doc": "The doi of the record"},
"include_metadata": {
"type": "boolean",
"doc": f"Whether to write the record metadata to a file '{metadata_filename}' and include it in the resulting file bundle.",
"default": True,
},
}
def create_outputs_schema(
self,
) -> ValueMapSchema:
return {
"file_bundle": {
"type": "file_bundle",
}
}
def download_file(self, file_data: Mapping[str, Any], target_path: Path):
import httpx
url = file_data["links"]["self"]
file_name = file_data["key"]
checksum = file_data["checksum"][4:]
target_file = target_path / file_name
if target_file.exists():
raise KiaraProcessingException(
f"Can't download file, target path already exists: {target_path.as_posix()}."
)
hash_md5 = hashlib.md5() # noqa
with open(target_file, "ab") as file2:
with httpx.Client() as client:
with client.stream("GET", url) as resp:
for chunk in resp.iter_bytes():
hash_md5.update(chunk)
file2.write(chunk)
if checksum != hash_md5.hexdigest():
raise KiaraProcessingException(
f"Can't downloda file '{file_name}', invalid checksum: {checksum} != {hash_md5.hexdigest()}"
)
return target_file
def process(self, inputs: ValueMap, outputs: ValueMap):
import pyzenodo3
include_metadata = inputs.get_value_data("include_metadata")
doi = inputs.get_value_data("doi")
zen = pyzenodo3.Zenodo()
record = zen.find_record_by_doi(doi)
path = KiaraFileBundle.create_tmp_dir()
shutil.rmtree(path, ignore_errors=True)
path.mkdir()
for file_data in record.data["files"]:
self.download_file(file_data, path)
if include_metadata:
metadata_filename = self.get_config_value("metadata_filename")
metadata_file = path / metadata_filename
metadata_file.write_bytes(orjson.dumps(record.data))
bundle = KiaraFileBundle.import_folder(path.as_posix())
outputs.set_value("file_bundle", bundle)
|
Attributes
_config_cls = ZenodoDownloadConfig
instance-attribute
class-attribute
Functions
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
28
29
30
31
32
33
34
35
36
37
38
39
40 | def create_inputs_schema(
self,
) -> ValueMapSchema:
metadata_filename = self.get_config_value("metadata_filename")
return {
"doi": {"type": "string", "doc": "The doi of the record"},
"include_metadata": {
"type": "boolean",
"doc": f"Whether to write the record metadata to a file '{metadata_filename}' and include it in the resulting file bundle.",
"default": True,
},
}
|
create_outputs_schema() -> ValueMapSchema
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
42
43
44
45
46
47
48
49
50 | def create_outputs_schema(
self,
) -> ValueMapSchema:
return {
"file_bundle": {
"type": "file_bundle",
}
}
|
download_file(file_data: Mapping[str, Any], target_path: Path)
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 | def download_file(self, file_data: Mapping[str, Any], target_path: Path):
import httpx
url = file_data["links"]["self"]
file_name = file_data["key"]
checksum = file_data["checksum"][4:]
target_file = target_path / file_name
if target_file.exists():
raise KiaraProcessingException(
f"Can't download file, target path already exists: {target_path.as_posix()}."
)
hash_md5 = hashlib.md5() # noqa
with open(target_file, "ab") as file2:
with httpx.Client() as client:
with client.stream("GET", url) as resp:
for chunk in resp.iter_bytes():
hash_md5.update(chunk)
file2.write(chunk)
if checksum != hash_md5.hexdigest():
raise KiaraProcessingException(
f"Can't downloda file '{file_name}', invalid checksum: {checksum} != {hash_md5.hexdigest()}"
)
return target_file
|
process(inputs: ValueMap, outputs: ValueMap)
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/modules/zenodo.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106 | def process(self, inputs: ValueMap, outputs: ValueMap):
import pyzenodo3
include_metadata = inputs.get_value_data("include_metadata")
doi = inputs.get_value_data("doi")
zen = pyzenodo3.Zenodo()
record = zen.find_record_by_doi(doi)
path = KiaraFileBundle.create_tmp_dir()
shutil.rmtree(path, ignore_errors=True)
path.mkdir()
for file_data in record.data["files"]:
self.download_file(file_data, path)
if include_metadata:
metadata_filename = self.get_config_value("metadata_filename")
metadata_file = path / metadata_filename
metadata_file.write_bytes(orjson.dumps(record.data))
bundle = KiaraFileBundle.import_folder(path.as_posix())
outputs.set_value("file_bundle", bundle)
|