Skip to content

download

Classes

DownloadMetadata

Bases: BaseModel

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/utils/download.py
15
16
17
18
19
20
class DownloadMetadata(BaseModel):
    url: str = Field(description="The url of the download request.")
    response_headers: List[Dict[str, str]] = Field(
        description="The response headers of the download request."
    )
    request_time: str = Field(description="The time the request was made.")

Attributes

url: str = Field(description='The url of the download request.') instance-attribute class-attribute
response_headers: List[Dict[str, str]] = Field(description='The response headers of the download request.') instance-attribute class-attribute
request_time: str = Field(description='The time the request was made.') instance-attribute class-attribute

DownloadBundleMetadata

Bases: DownloadMetadata

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/utils/download.py
23
24
25
26
class DownloadBundleMetadata(DownloadMetadata):
    import_config: FolderImportConfig = Field(
        description="The import configuration that was used to import the files from the source bundle."
    )

Attributes

import_config: FolderImportConfig = Field(description='The import configuration that was used to import the files from the source bundle.') instance-attribute class-attribute

Functions

download_file(url: str, target: Union[str, None] = None, file_name: Union[str, None] = None, attach_metadata: bool = True, return_md5_hash: bool = False) -> Union[KiaraFile, Tuple[KiaraFile, str]]

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/utils/download.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def download_file(
    url: str,
    target: Union[str, None] = None,
    file_name: Union[str, None] = None,
    attach_metadata: bool = True,
    return_md5_hash: bool = False,
) -> Union[KiaraFile, Tuple[KiaraFile, str]]:

    import hashlib

    import httpx
    import pytz

    if not target:
        tmp_file = tempfile.NamedTemporaryFile(delete=False)
        _target = Path(tmp_file.name)
    else:
        _target = Path(target)
        _target.parent.mkdir(parents=True, exist_ok=True)

    if return_md5_hash:
        hash_md5 = hashlib.md5()  # noqa

    history = []
    datetime.utcnow().replace(tzinfo=pytz.utc)
    with open(_target, "wb") as f:
        with httpx.stream("GET", url, follow_redirects=True) as r:
            history.append(dict(r.headers))
            for h in r.history:
                history.append(dict(h.headers))
            for data in r.iter_bytes():
                if return_md5_hash:
                    hash_md5.update(data)
                f.write(data)

    if not file_name:
        # TODO: make this smarter, using content-disposition headers if available
        file_name = url.split("/")[-1]

    result_file = KiaraFile.load_file(_target.as_posix(), file_name)

    if attach_metadata:
        metadata = {
            "url": url,
            "response_headers": history,
            "request_time": datetime.utcnow().replace(tzinfo=pytz.utc).isoformat(),
        }
        _metadata = DownloadMetadata(**metadata)
        result_file.metadata["download_info"] = _metadata.dict()
        result_file.metadata_schemas["download_info"] = DownloadMetadata.schema_json()

    if return_md5_hash:
        return result_file, hash_md5.hexdigest()
    else:
        return result_file

download_file_bundle(url: str, attach_metadata: bool = True, import_config: Union[FolderImportConfig, None] = None) -> KiaraFileBundle

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/onboarding/utils/download.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def download_file_bundle(
    url: str,
    attach_metadata: bool = True,
    import_config: Union[FolderImportConfig, None] = None,
) -> KiaraFileBundle:

    import shutil
    from datetime import datetime
    from urllib.parse import urlparse

    import httpx
    import pytz

    suffix = None
    try:
        parsed_url = urlparse(url)
        _, suffix = os.path.splitext(parsed_url.path)
    except Exception:
        pass
    if not suffix:
        suffix = ""

    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
    atexit.register(tmp_file.close)

    history = []
    datetime.utcnow().replace(tzinfo=pytz.utc)
    with open(tmp_file.name, "wb") as f:
        with httpx.stream("GET", url, follow_redirects=True) as r:
            history.append(dict(r.headers))
            for h in r.history:
                history.append(dict(h.headers))
            for data in r.iter_bytes():
                f.write(data)

    out_dir = tempfile.mkdtemp()

    def del_out_dir():
        shutil.rmtree(out_dir, ignore_errors=True)

    atexit.register(del_out_dir)

    error = None
    try:
        shutil.unpack_archive(tmp_file.name, out_dir)
    except Exception:
        # try patool, maybe we're lucky
        try:
            import patoolib

            patoolib.extract_archive(tmp_file.name, outdir=out_dir)
        except Exception as e:
            error = e

    if error is not None:
        raise KiaraException(msg=f"Could not extract archive: {error}.")

    bundle = KiaraFileBundle.import_folder(out_dir, import_config=import_config)

    if import_config is None:
        ic_dict = {}
    elif isinstance(import_config, FolderImportConfig):
        ic_dict = import_config.dict()
    else:
        ic_dict = import_config
    if attach_metadata:
        metadata = {
            "url": url,
            "response_headers": history,
            "request_time": datetime.utcnow().replace(tzinfo=pytz.utc).isoformat(),
            "import_config": ic_dict,
        }
        _metadata = DownloadBundleMetadata(**metadata)
        bundle.metadata["download_info"] = _metadata.dict()
        bundle.metadata_schemas["download_info"] = DownloadMetadata.schema_json()

    return bundle