Bases: KiaraModel
A class to manage array-like data.
Internally, this uses an Apache Arrow Array to handle the data in memory and on disk.
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/array.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 | class KiaraArray(KiaraModel):
"""A class to manage array-like data.
Internally, this uses an [Apache Arrow Array](https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array) to handle the data in memory and on disk.
"""
# @classmethod
# def create_in_temp_dir(cls, ):
#
# temp_f = tempfile.mkdtemp()
# file_path = os.path.join(temp_f, "array.feather")
#
# def cleanup():
# shutil.rmtree(file_path, ignore_errors=True)
#
# atexit.register(cleanup)
#
# array_obj = cls(feather_path=file_path)
# return array_obj
@classmethod
def create_array(cls, data: Any) -> "KiaraArray":
if isinstance(data, KiaraArray):
return data
array_obj = None
if isinstance(data, (pa.Array, pa.ChunkedArray)):
array_obj = data
elif isinstance(data, pa.Table):
if len(data.columns) != 1:
raise Exception(
f"Invalid type, only Arrow Arrays or single-column Tables allowed. This value is a table with {len(data.columns)} columns."
)
array_obj = data.column(0)
else:
try:
array_obj = pa.array(data)
except Exception:
pass
if array_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraArray()
if not isinstance(array_obj, pa.lib.ChunkedArray):
array_obj = pa.chunked_array(array_obj)
obj._array_obj = array_obj
return obj
data_path: Union[str, None] = Field(
description="The path to the (feather) file backing this array.", default=None
)
_array_obj: pa.Array = PrivateAttr(default=None)
def _retrieve_data_to_hash(self) -> Any:
raise NotImplementedError()
def __len__(self):
return len(self.arrow_array)
@property
def arrow_array(self) -> pa.Array:
if self._array_obj is not None:
return self._array_obj
if not self.data_path:
raise Exception("Can't retrieve array data, object not initialized (yet).")
with pa.memory_map(self.data_path, "r") as source:
table: pa.Table = pa.ipc.open_file(source).read_all()
if len(table.columns) != 1:
raise Exception(
f"Invalid serialized array data, only a single-column Table is allowed. This value is a table with {len(table.columns)} columns."
)
self._array_obj = table.column(0)
return self._array_obj
def to_pylist(self):
return self.arrow_array.to_pylist()
def to_pandas(self):
return self.arrow_array.to_pandas()
|
Attributes
data_path: Union[str, None] = Field(description='The path to the (feather) file backing this array.', default=None)
class-attribute
instance-attribute
arrow_array: pa.Array
property
Functions
create_array(data: Any) -> KiaraArray
classmethod
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/array.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 | @classmethod
def create_array(cls, data: Any) -> "KiaraArray":
if isinstance(data, KiaraArray):
return data
array_obj = None
if isinstance(data, (pa.Array, pa.ChunkedArray)):
array_obj = data
elif isinstance(data, pa.Table):
if len(data.columns) != 1:
raise Exception(
f"Invalid type, only Arrow Arrays or single-column Tables allowed. This value is a table with {len(data.columns)} columns."
)
array_obj = data.column(0)
else:
try:
array_obj = pa.array(data)
except Exception:
pass
if array_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraArray()
if not isinstance(array_obj, pa.lib.ChunkedArray):
array_obj = pa.chunked_array(array_obj)
obj._array_obj = array_obj
return obj
|
to_pylist()
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/array.py
| def to_pylist(self):
return self.arrow_array.to_pylist()
|
to_pandas()
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/array.py
| def to_pandas(self):
return self.arrow_array.to_pandas()
|