table
Classes¶
KiaraTable (KiaraModel)
pydantic-model
¶
A wrapper class to manage tabular data in a memory efficient way.
Source code in tabular/models/table.py
class KiaraTable(KiaraModel):
"""A wrapper class to manage tabular data in a memory efficient way."""
@classmethod
def create_table(cls, data: Any) -> "KiaraTable":
"""Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""
table_obj = None
if isinstance(data, KiaraTable):
return data
if isinstance(data, (pa.Table)):
table_obj = data
else:
try:
table_obj = pa.table(data)
except Exception:
pass
if table_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraTable()
obj._table_obj = table_obj
return obj
data_path: Union[None, str] = Field(
description="The path to the (feather) file backing this array.", default=None
)
"""The path where the table object is store (for internal or read-only use)."""
_table_obj: pa.Table = PrivateAttr(default=None)
def _retrieve_data_to_hash(self) -> Any:
raise NotImplementedError()
@property
def arrow_table(self) -> pa.Table:
"""Return the data as an Apache Arrow Table instance."""
if self._table_obj is not None:
return self._table_obj
if not self.data_path:
raise Exception("Can't retrieve table data, object not initialized (yet).")
with pa.memory_map(self.data_path, "r") as source:
table: pa.Table = pa.ipc.open_file(source).read_all()
self._table_obj = table
return self._table_obj
@property
def column_names(self) -> Iterable[str]:
"""Retrieve the names of all the columns of this table."""
return self.arrow_table.column_names
@property
def num_rows(self) -> int:
"""Return the number of rows in this table."""
return self.arrow_table.num_rows
def to_pydict(self):
"""Convert and return the table data as a dictionary of lists.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pydict()
def to_pylist(self):
"""Convert and return the table data as a list of rows/dictionaries.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pylist()
def to_pandas(self):
"""Convert and return the table data to a Pandas dataframe.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pandas()
Attributes¶
arrow_table: Table
property
readonly
¶
Return the data as an Apache Arrow Table instance.
column_names: Iterable[str]
property
readonly
¶
Retrieve the names of all the columns of this table.
data_path: str
pydantic-field
¶
The path to the (feather) file backing this array.
num_rows: int
property
readonly
¶
Return the number of rows in this table.
Methods¶
create_table(data)
classmethod
¶
Create a KiaraTable
instance from an Apache Arrow Table, or dict of lists.
Source code in tabular/models/table.py
@classmethod
def create_table(cls, data: Any) -> "KiaraTable":
"""Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""
table_obj = None
if isinstance(data, KiaraTable):
return data
if isinstance(data, (pa.Table)):
table_obj = data
else:
try:
table_obj = pa.table(data)
except Exception:
pass
if table_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraTable()
obj._table_obj = table_obj
return obj
to_pandas(self)
¶
Convert and return the table data to a Pandas dataframe.
This will load all data into memory, so you might or might not want to do that.
Source code in tabular/models/table.py
def to_pandas(self):
"""Convert and return the table data to a Pandas dataframe.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pandas()
to_pydict(self)
¶
Convert and return the table data as a dictionary of lists.
This will load all data into memory, so you might or might not want to do that.
Source code in tabular/models/table.py
def to_pydict(self):
"""Convert and return the table data as a dictionary of lists.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pydict()
to_pylist(self)
¶
Convert and return the table data as a list of rows/dictionaries.
This will load all data into memory, so you might or might not want to do that.
Source code in tabular/models/table.py
def to_pylist(self):
"""Convert and return the table data as a list of rows/dictionaries.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pylist()
KiaraTableMetadata (ValueMetadata)
pydantic-model
¶
File stats.
Source code in tabular/models/table.py
class KiaraTableMetadata(ValueMetadata):
"""File stats."""
_metadata_key = "table"
@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
return ["table"]
@classmethod
def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":
kiara_table: KiaraTable = value.data
table: pa.Table = kiara_table.arrow_table
table_schema = {}
for name in table.schema.names:
field = table.schema.field(name)
md = field.metadata
_type = field.type
if not md:
md = {
"arrow_type_id": _type.id,
}
_d = {
"type_name": str(_type),
"metadata": md,
}
table_schema[name] = _d
schema = {
"column_names": table.column_names,
"column_schema": table_schema,
"rows": table.num_rows,
"size": table.nbytes,
}
md = TableMetadata.construct(**schema)
return KiaraTableMetadata.construct(table=md)
table: TableMetadata = Field(description="The table schema.")
Attributes¶
table: TableMetadata
pydantic-field
required
¶
The table schema.
create_value_metadata(value)
classmethod
¶
Source code in tabular/models/table.py
@classmethod
def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":
kiara_table: KiaraTable = value.data
table: pa.Table = kiara_table.arrow_table
table_schema = {}
for name in table.schema.names:
field = table.schema.field(name)
md = field.metadata
_type = field.type
if not md:
md = {
"arrow_type_id": _type.id,
}
_d = {
"type_name": str(_type),
"metadata": md,
}
table_schema[name] = _d
schema = {
"column_names": table.column_names,
"column_schema": table_schema,
"rows": table.num_rows,
"size": table.nbytes,
}
md = TableMetadata.construct(**schema)
return KiaraTableMetadata.construct(table=md)
retrieve_supported_data_types()
classmethod
¶
Source code in tabular/models/table.py
@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
return ["table"]