models
This module contains the metadata (and other) models that are used in the kiara_plugin.tabular
package.
Those models are convenience wrappers that make it easier for kiara to find, create, manage and version metadata -- but also other type of models -- that is attached to data, as well as kiara modules.
Metadata models must be a sub-class of [kiara.metadata.MetadataModel][]. Other models usually sub-class a pydantic BaseModel or implement custom base classes.
Classes¶
ColumnSchema (BaseModel)
pydantic-model
¶
Describes properties of a single column of the 'table' data type.
Source code in tabular/models/__init__.py
class ColumnSchema(BaseModel):
"""Describes properties of a single column of the 'table' data type."""
type_name: str = Field(
description="The type name of the column (backend-specific)."
)
metadata: Dict[str, Any] = Field(
description="Other metadata for the column.", default_factory=dict
)
TableMetadata (KiaraModel)
pydantic-model
¶
Describes properties for the 'table' data type.
Source code in tabular/models/__init__.py
class TableMetadata(KiaraModel):
"""Describes properties for the 'table' data type."""
column_names: List[str] = Field(description="The name of the columns of the table.")
column_schema: Dict[str, ColumnSchema] = Field(
description="The schema description of the table."
)
rows: int = Field(description="The number of rows the table contains.")
size: Union[int, None] = Field(
description="The tables size in bytes.", default=None
)
def _retrieve_data_to_hash(self) -> Any:
return {
"column_schemas": {k: v.dict() for k, v in self.column_schema.items()},
"rows": self.rows,
"size": self.size,
}
Attributes¶
column_names: List[str]
pydantic-field
required
¶
The name of the columns of the table.
column_schema: Dict[str, kiara_plugin.tabular.models.ColumnSchema]
pydantic-field
required
¶
The schema description of the table.
rows: int
pydantic-field
required
¶
The number of rows the table contains.
size: int
pydantic-field
¶
The tables size in bytes.
Modules¶
array
¶
Classes¶
KiaraArray (KiaraModel)
pydantic-model
¶
A class to manage array-like data.
Internally, this uses an Apache Arrow Array to handle the data in memory and on disk.
Source code in tabular/models/array.py
class KiaraArray(KiaraModel):
"""A class to manage array-like data.
Internally, this uses an [Apache Arrow Array](https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array) to handle the data in memory and on disk.
"""
# @classmethod
# def create_in_temp_dir(cls, ):
#
# temp_f = tempfile.mkdtemp()
# file_path = os.path.join(temp_f, "array.feather")
#
# def cleanup():
# shutil.rmtree(file_path, ignore_errors=True)
#
# atexit.register(cleanup)
#
# array_obj = cls(feather_path=file_path)
# return array_obj
@classmethod
def create_array(cls, data: Any) -> "KiaraArray":
if isinstance(data, KiaraArray):
return data
array_obj = None
if isinstance(data, (pa.Array, pa.ChunkedArray)):
array_obj = data
elif isinstance(data, pa.Table):
if len(data.columns) != 1:
raise Exception(
f"Invalid type, only Arrow Arrays or single-column Tables allowed. This value is a table with {len(data.columns)} columns."
)
array_obj = data.column(0)
else:
try:
array_obj = pa.array(data)
except Exception:
pass
if array_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraArray()
if not isinstance(array_obj, pa.lib.ChunkedArray):
array_obj = pa.chunked_array(array_obj)
obj._array_obj = array_obj
return obj
data_path: Union[str, None] = Field(
description="The path to the (feather) file backing this array.", default=None
)
_array_obj: pa.Array = PrivateAttr(default=None)
def _retrieve_data_to_hash(self) -> Any:
raise NotImplementedError()
def __len__(self):
return len(self.arrow_array)
@property
def arrow_array(self) -> pa.Array:
if self._array_obj is not None:
return self._array_obj
if not self.data_path:
raise Exception("Can't retrieve array data, object not initialized (yet).")
with pa.memory_map(self.data_path, "r") as source:
table: pa.Table = pa.ipc.open_file(source).read_all()
if len(table.columns) != 1:
raise Exception(
f"Invalid serialized array data, only a single-column Table is allowed. This value is a table with {len(table.columns)} columns."
)
self._array_obj = table.column(0)
return self._array_obj
def to_pylist(self):
return self.arrow_array.to_pylist()
def to_pandas(self):
return self.arrow_array.to_pandas()
Attributes¶
arrow_array: Array
property
readonly
¶data_path: str
pydantic-field
¶The path to the (feather) file backing this array.
create_array(data)
classmethod
¶Source code in tabular/models/array.py
@classmethod
def create_array(cls, data: Any) -> "KiaraArray":
if isinstance(data, KiaraArray):
return data
array_obj = None
if isinstance(data, (pa.Array, pa.ChunkedArray)):
array_obj = data
elif isinstance(data, pa.Table):
if len(data.columns) != 1:
raise Exception(
f"Invalid type, only Arrow Arrays or single-column Tables allowed. This value is a table with {len(data.columns)} columns."
)
array_obj = data.column(0)
else:
try:
array_obj = pa.array(data)
except Exception:
pass
if array_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraArray()
if not isinstance(array_obj, pa.lib.ChunkedArray):
array_obj = pa.chunked_array(array_obj)
obj._array_obj = array_obj
return obj
to_pandas(self)
¶Source code in tabular/models/array.py
def to_pandas(self):
return self.arrow_array.to_pandas()
to_pylist(self)
¶Source code in tabular/models/array.py
def to_pylist(self):
return self.arrow_array.to_pylist()
db
¶
Classes¶
DatabaseMetadata (ValueMetadata)
pydantic-model
¶
Database and table properties.
Source code in tabular/models/db.py
class DatabaseMetadata(ValueMetadata):
"""Database and table properties."""
_metadata_key = "database"
@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
return ["database"]
@classmethod
def create_value_metadata(cls, value: Value) -> "DatabaseMetadata":
database: KiaraDatabase = value.data
insp = database.get_sqlalchemy_inspector()
mds = {}
for table_name in insp.get_table_names():
with database.get_sqlalchemy_engine().connect() as con:
result = con.execute(text(f"SELECT count(*) from {table_name}"))
num_rows = result.fetchone()[0]
try:
result = con.execute(
text(
f'SELECT SUM("pgsize") FROM "dbstat" WHERE name="{table_name}"'
)
)
size: Union[int, None] = result.fetchone()[0]
except Exception:
size = None
columns = {}
for column in insp.get_columns(table_name=table_name):
name = column["name"]
_type = column["type"]
type_name = SQLALCHEMY_SQLITE_TYPE_MAP[type(_type)]
columns[name] = {
"type_name": type_name,
"metadata": {
"nullable": column["nullable"],
"primary_key": True if column["primary_key"] else False,
},
}
schema = {
"column_names": list(columns.keys()),
"column_schema": columns,
"rows": num_rows,
"size": size,
}
md = TableMetadata(**schema)
mds[table_name] = md
return DatabaseMetadata.construct(tables=mds)
tables: Dict[str, TableMetadata] = Field(description="The table schema.")
Attributes¶
tables: Dict[str, kiara_plugin.tabular.models.TableMetadata]
pydantic-field
required
¶The table schema.
create_value_metadata(value)
classmethod
¶Source code in tabular/models/db.py
@classmethod
def create_value_metadata(cls, value: Value) -> "DatabaseMetadata":
database: KiaraDatabase = value.data
insp = database.get_sqlalchemy_inspector()
mds = {}
for table_name in insp.get_table_names():
with database.get_sqlalchemy_engine().connect() as con:
result = con.execute(text(f"SELECT count(*) from {table_name}"))
num_rows = result.fetchone()[0]
try:
result = con.execute(
text(
f'SELECT SUM("pgsize") FROM "dbstat" WHERE name="{table_name}"'
)
)
size: Union[int, None] = result.fetchone()[0]
except Exception:
size = None
columns = {}
for column in insp.get_columns(table_name=table_name):
name = column["name"]
_type = column["type"]
type_name = SQLALCHEMY_SQLITE_TYPE_MAP[type(_type)]
columns[name] = {
"type_name": type_name,
"metadata": {
"nullable": column["nullable"],
"primary_key": True if column["primary_key"] else False,
},
}
schema = {
"column_names": list(columns.keys()),
"column_schema": columns,
"rows": num_rows,
"size": size,
}
md = TableMetadata(**schema)
mds[table_name] = md
return DatabaseMetadata.construct(tables=mds)
retrieve_supported_data_types()
classmethod
¶Source code in tabular/models/db.py
@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
return ["database"]
KiaraDatabase (KiaraModel)
pydantic-model
¶
A wrapper class to manage a sqlite database.
Source code in tabular/models/db.py
class KiaraDatabase(KiaraModel):
"""A wrapper class to manage a sqlite database."""
@classmethod
def create_in_temp_dir(
cls,
init_statement: Union[None, str, "TextClause"] = None,
init_data: Union[Mapping[str, Any], None] = None,
):
temp_f = tempfile.mkdtemp()
db_path = os.path.join(temp_f, "db.sqlite")
def cleanup():
shutil.rmtree(db_path, ignore_errors=True)
atexit.register(cleanup)
db = cls(db_file_path=db_path)
db.create_if_not_exists()
if init_statement:
db._unlock_db()
db.execute_sql(statement=init_statement, data=init_data, invalidate=True)
db._lock_db()
return db
db_file_path: str = Field(description="The path to the sqlite database file.")
_cached_engine = PrivateAttr(default=None)
_cached_inspector = PrivateAttr(default=None)
_table_names = PrivateAttr(default=None)
_tables: Dict[str, Table] = PrivateAttr(default_factory=dict)
_metadata_obj: Union[MetaData, None] = PrivateAttr(default=None)
# _table_schemas: Optional[Dict[str, SqliteTableSchema]] = PrivateAttr(default=None)
# _file_hash: Optional[str] = PrivateAttr(default=None)
_file_cid: Union[CID, None] = PrivateAttr(default=None)
_lock: bool = PrivateAttr(default=True)
_immutable: bool = PrivateAttr(default=None)
def _retrieve_id(self) -> str:
return str(self.file_cid)
def _retrieve_data_to_hash(self) -> Any:
return self.file_cid
@validator("db_file_path", allow_reuse=True)
def ensure_absolute_path(cls, path: str):
path = os.path.abspath(path)
if not os.path.exists(os.path.dirname(path)):
raise ValueError(f"Parent folder for database file does not exist: {path}")
return path
@property
def db_url(self) -> str:
return f"sqlite:///{self.db_file_path}"
@property
def file_cid(self) -> CID:
if self._file_cid is not None:
return self._file_cid
self._file_cid = compute_cid_from_file(file=self.db_file_path, codec="raw")
return self._file_cid
def get_sqlalchemy_engine(self) -> "Engine":
if self._cached_engine is not None:
return self._cached_engine
def _pragma_on_connect(dbapi_con, con_record):
dbapi_con.execute("PRAGMA query_only = ON")
self._cached_engine = create_engine(self.db_url, future=True)
if self._lock:
event.listen(self._cached_engine, "connect", _pragma_on_connect)
return self._cached_engine
def _lock_db(self):
self._lock = True
self._invalidate()
def _unlock_db(self):
if self._immutable:
raise Exception("Can't unlock db, it's immutable.")
self._lock = False
self._invalidate()
def create_if_not_exists(self):
from sqlalchemy_utils import create_database, database_exists
if not database_exists(self.db_url):
create_database(self.db_url)
def execute_sql(
self,
statement: Union[str, "TextClause"],
data: Union[Mapping[str, Any], None] = None,
invalidate: bool = False,
):
"""Execute an sql script.
Arguments:
statement: the sql statement
data: (optional) data, to be bound to the statement
invalidate: whether to invalidate cached values within this object
"""
if isinstance(statement, str):
statement = text(statement)
if data:
statement.bindparams(**data)
with self.get_sqlalchemy_engine().connect() as con:
con.execute(statement)
if invalidate:
self._invalidate()
def _invalidate(self):
self._cached_engine = None
self._cached_inspector = None
self._table_names = None
# self._file_hash = None
self._metadata_obj = None
self._tables.clear()
def _invalidate_other(self):
pass
def get_sqlalchemy_metadata(self) -> MetaData:
"""Return the sqlalchemy Metadtaa object for the underlying database.
This is used internally, you typically don't need to access this attribute.
"""
if self._metadata_obj is None:
self._metadata_obj = MetaData()
return self._metadata_obj
def copy_database_file(self, target: str):
os.makedirs(os.path.dirname(target))
shutil.copy2(self.db_file_path, target)
new_db = KiaraDatabase(db_file_path=target)
# if self._file_hash:
# new_db._file_hash = self._file_hash
return new_db
def get_sqlalchemy_inspector(self) -> Inspector:
if self._cached_inspector is not None:
return self._cached_inspector
self._cached_inspector = inspect(self.get_sqlalchemy_engine())
return self._cached_inspector
@property
def table_names(self) -> Iterable[str]:
if self._table_names is not None:
return self._table_names
self._table_names = self.get_sqlalchemy_inspector().get_table_names()
return self._table_names
def get_sqlalchemy_table(self, table_name: str) -> Table:
"""Return the sqlalchemy edges table instance for this network datab."""
if table_name in self._tables.keys():
return self._tables[table_name]
table = Table(
table_name,
self.get_sqlalchemy_metadata(),
autoload_with=self.get_sqlalchemy_engine(),
)
self._tables[table_name] = table
return table
Attributes¶
db_file_path: str
pydantic-field
required
¶The path to the sqlite database file.
db_url: str
property
readonly
¶file_cid: CID
property
readonly
¶table_names: Iterable[str]
property
readonly
¶Methods¶
copy_database_file(self, target)
¶Source code in tabular/models/db.py
def copy_database_file(self, target: str):
os.makedirs(os.path.dirname(target))
shutil.copy2(self.db_file_path, target)
new_db = KiaraDatabase(db_file_path=target)
# if self._file_hash:
# new_db._file_hash = self._file_hash
return new_db
create_if_not_exists(self)
¶Source code in tabular/models/db.py
def create_if_not_exists(self):
from sqlalchemy_utils import create_database, database_exists
if not database_exists(self.db_url):
create_database(self.db_url)
create_in_temp_dir(init_statement=None, init_data=None)
classmethod
¶Source code in tabular/models/db.py
@classmethod
def create_in_temp_dir(
cls,
init_statement: Union[None, str, "TextClause"] = None,
init_data: Union[Mapping[str, Any], None] = None,
):
temp_f = tempfile.mkdtemp()
db_path = os.path.join(temp_f, "db.sqlite")
def cleanup():
shutil.rmtree(db_path, ignore_errors=True)
atexit.register(cleanup)
db = cls(db_file_path=db_path)
db.create_if_not_exists()
if init_statement:
db._unlock_db()
db.execute_sql(statement=init_statement, data=init_data, invalidate=True)
db._lock_db()
return db
ensure_absolute_path(path)
classmethod
¶Source code in tabular/models/db.py
@validator("db_file_path", allow_reuse=True)
def ensure_absolute_path(cls, path: str):
path = os.path.abspath(path)
if not os.path.exists(os.path.dirname(path)):
raise ValueError(f"Parent folder for database file does not exist: {path}")
return path
execute_sql(self, statement, data=None, invalidate=False)
¶Execute an sql script.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
statement |
Union[str, TextClause] |
the sql statement |
required |
data |
Optional[Mapping[str, Any]] |
(optional) data, to be bound to the statement |
None |
invalidate |
bool |
whether to invalidate cached values within this object |
False |
Source code in tabular/models/db.py
def execute_sql(
self,
statement: Union[str, "TextClause"],
data: Union[Mapping[str, Any], None] = None,
invalidate: bool = False,
):
"""Execute an sql script.
Arguments:
statement: the sql statement
data: (optional) data, to be bound to the statement
invalidate: whether to invalidate cached values within this object
"""
if isinstance(statement, str):
statement = text(statement)
if data:
statement.bindparams(**data)
with self.get_sqlalchemy_engine().connect() as con:
con.execute(statement)
if invalidate:
self._invalidate()
get_sqlalchemy_engine(self)
¶Source code in tabular/models/db.py
def get_sqlalchemy_engine(self) -> "Engine":
if self._cached_engine is not None:
return self._cached_engine
def _pragma_on_connect(dbapi_con, con_record):
dbapi_con.execute("PRAGMA query_only = ON")
self._cached_engine = create_engine(self.db_url, future=True)
if self._lock:
event.listen(self._cached_engine, "connect", _pragma_on_connect)
return self._cached_engine
get_sqlalchemy_inspector(self)
¶Source code in tabular/models/db.py
def get_sqlalchemy_inspector(self) -> Inspector:
if self._cached_inspector is not None:
return self._cached_inspector
self._cached_inspector = inspect(self.get_sqlalchemy_engine())
return self._cached_inspector
get_sqlalchemy_metadata(self)
¶Return the sqlalchemy Metadtaa object for the underlying database.
This is used internally, you typically don't need to access this attribute.
Source code in tabular/models/db.py
def get_sqlalchemy_metadata(self) -> MetaData:
"""Return the sqlalchemy Metadtaa object for the underlying database.
This is used internally, you typically don't need to access this attribute.
"""
if self._metadata_obj is None:
self._metadata_obj = MetaData()
return self._metadata_obj
get_sqlalchemy_table(self, table_name)
¶Return the sqlalchemy edges table instance for this network datab.
Source code in tabular/models/db.py
def get_sqlalchemy_table(self, table_name: str) -> Table:
"""Return the sqlalchemy edges table instance for this network datab."""
if table_name in self._tables.keys():
return self._tables[table_name]
table = Table(
table_name,
self.get_sqlalchemy_metadata(),
autoload_with=self.get_sqlalchemy_engine(),
)
self._tables[table_name] = table
return table
SqliteTableSchema (BaseModel)
pydantic-model
¶
Source code in tabular/models/db.py
class SqliteTableSchema(BaseModel):
columns: Dict[str, SqliteDataType] = Field(
description="The table columns and their attributes."
)
index_columns: List[str] = Field(
description="The columns to index", default_factory=list
)
nullable_columns: List[str] = Field(
description="The columns that are nullable.", default_factory=list
)
unique_columns: List[str] = Field(
description="The columns that should be marked 'UNIQUE'.", default_factory=list
)
primary_key: Union[str, None] = Field(
description="The primary key for this table.", default=None
)
def create_table_metadata(
self,
table_name: str,
) -> Tuple[MetaData, Table]:
"""Create an sql script to initialize a table.
Arguments:
column_attrs: a map with the column name as key, and column details ('type', 'extra_column_info', 'create_index') as values
"""
table_columns = []
for column_name, data_type in self.columns.items():
column_obj = Column(
column_name,
SQLITE_SQLALCHEMY_TYPE_MAP[data_type],
nullable=column_name in self.nullable_columns,
primary_key=column_name == self.primary_key,
index=column_name in self.index_columns,
unique=column_name in self.unique_columns,
)
table_columns.append(column_obj)
meta = MetaData()
table = Table(table_name, meta, *table_columns)
return meta, table
def create_table(self, table_name: str, engine: Engine) -> Table:
meta, table = self.create_table_metadata(table_name=table_name)
meta.create_all(engine)
return table
Attributes¶
columns: Dict[str, Literal['NULL', 'INTEGER', 'REAL', 'TEXT', 'BLOB']]
pydantic-field
required
¶The table columns and their attributes.
index_columns: List[str]
pydantic-field
¶The columns to index
nullable_columns: List[str]
pydantic-field
¶The columns that are nullable.
primary_key: str
pydantic-field
¶The primary key for this table.
unique_columns: List[str]
pydantic-field
¶The columns that should be marked 'UNIQUE'.
Methods¶
create_table(self, table_name, engine)
¶Source code in tabular/models/db.py
def create_table(self, table_name: str, engine: Engine) -> Table:
meta, table = self.create_table_metadata(table_name=table_name)
meta.create_all(engine)
return table
create_table_metadata(self, table_name)
¶Create an sql script to initialize a table.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
column_attrs |
a map with the column name as key, and column details ('type', 'extra_column_info', 'create_index') as values |
required |
Source code in tabular/models/db.py
def create_table_metadata(
self,
table_name: str,
) -> Tuple[MetaData, Table]:
"""Create an sql script to initialize a table.
Arguments:
column_attrs: a map with the column name as key, and column details ('type', 'extra_column_info', 'create_index') as values
"""
table_columns = []
for column_name, data_type in self.columns.items():
column_obj = Column(
column_name,
SQLITE_SQLALCHEMY_TYPE_MAP[data_type],
nullable=column_name in self.nullable_columns,
primary_key=column_name == self.primary_key,
index=column_name in self.index_columns,
unique=column_name in self.unique_columns,
)
table_columns.append(column_obj)
meta = MetaData()
table = Table(table_name, meta, *table_columns)
return meta, table
table
¶
Classes¶
KiaraTable (KiaraModel)
pydantic-model
¶
A wrapper class to manage tabular data in a memory efficient way.
Source code in tabular/models/table.py
class KiaraTable(KiaraModel):
"""A wrapper class to manage tabular data in a memory efficient way."""
@classmethod
def create_table(cls, data: Any) -> "KiaraTable":
"""Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""
table_obj = None
if isinstance(data, KiaraTable):
return data
if isinstance(data, (pa.Table)):
table_obj = data
else:
try:
table_obj = pa.table(data)
except Exception:
pass
if table_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraTable()
obj._table_obj = table_obj
return obj
data_path: Union[None, str] = Field(
description="The path to the (feather) file backing this array.", default=None
)
"""The path where the table object is store (for internal or read-only use)."""
_table_obj: pa.Table = PrivateAttr(default=None)
def _retrieve_data_to_hash(self) -> Any:
raise NotImplementedError()
@property
def arrow_table(self) -> pa.Table:
"""Return the data as an Apache Arrow Table instance."""
if self._table_obj is not None:
return self._table_obj
if not self.data_path:
raise Exception("Can't retrieve table data, object not initialized (yet).")
with pa.memory_map(self.data_path, "r") as source:
table: pa.Table = pa.ipc.open_file(source).read_all()
self._table_obj = table
return self._table_obj
@property
def column_names(self) -> Iterable[str]:
"""Retrieve the names of all the columns of this table."""
return self.arrow_table.column_names
@property
def num_rows(self) -> int:
"""Return the number of rows in this table."""
return self.arrow_table.num_rows
def to_pydict(self):
"""Convert and return the table data as a dictionary of lists.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pydict()
def to_pylist(self):
"""Convert and return the table data as a list of rows/dictionaries.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pylist()
def to_pandas(self):
"""Convert and return the table data to a Pandas dataframe.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pandas()
Attributes¶
arrow_table: Table
property
readonly
¶Return the data as an Apache Arrow Table instance.
column_names: Iterable[str]
property
readonly
¶Retrieve the names of all the columns of this table.
data_path: str
pydantic-field
¶The path to the (feather) file backing this array.
num_rows: int
property
readonly
¶Return the number of rows in this table.
Methods¶
create_table(data)
classmethod
¶Create a KiaraTable
instance from an Apache Arrow Table, or dict of lists.
Source code in tabular/models/table.py
@classmethod
def create_table(cls, data: Any) -> "KiaraTable":
"""Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""
table_obj = None
if isinstance(data, KiaraTable):
return data
if isinstance(data, (pa.Table)):
table_obj = data
else:
try:
table_obj = pa.table(data)
except Exception:
pass
if table_obj is None:
raise Exception(
f"Can't create table, invalid source data type: {type(data)}."
)
obj = KiaraTable()
obj._table_obj = table_obj
return obj
to_pandas(self)
¶Convert and return the table data to a Pandas dataframe.
This will load all data into memory, so you might or might not want to do that.
Source code in tabular/models/table.py
def to_pandas(self):
"""Convert and return the table data to a Pandas dataframe.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pandas()
to_pydict(self)
¶Convert and return the table data as a dictionary of lists.
This will load all data into memory, so you might or might not want to do that.
Source code in tabular/models/table.py
def to_pydict(self):
"""Convert and return the table data as a dictionary of lists.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pydict()
to_pylist(self)
¶Convert and return the table data as a list of rows/dictionaries.
This will load all data into memory, so you might or might not want to do that.
Source code in tabular/models/table.py
def to_pylist(self):
"""Convert and return the table data as a list of rows/dictionaries.
This will load all data into memory, so you might or might not want to do that.
"""
return self.arrow_table.to_pylist()
KiaraTableMetadata (ValueMetadata)
pydantic-model
¶
File stats.
Source code in tabular/models/table.py
class KiaraTableMetadata(ValueMetadata):
"""File stats."""
_metadata_key = "table"
@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
return ["table"]
@classmethod
def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":
kiara_table: KiaraTable = value.data
table: pa.Table = kiara_table.arrow_table
table_schema = {}
for name in table.schema.names:
field = table.schema.field(name)
md = field.metadata
_type = field.type
if not md:
md = {
"arrow_type_id": _type.id,
}
_d = {
"type_name": str(_type),
"metadata": md,
}
table_schema[name] = _d
schema = {
"column_names": table.column_names,
"column_schema": table_schema,
"rows": table.num_rows,
"size": table.nbytes,
}
md = TableMetadata.construct(**schema)
return KiaraTableMetadata.construct(table=md)
table: TableMetadata = Field(description="The table schema.")
Attributes¶
table: TableMetadata
pydantic-field
required
¶The table schema.
create_value_metadata(value)
classmethod
¶Source code in tabular/models/table.py
@classmethod
def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":
kiara_table: KiaraTable = value.data
table: pa.Table = kiara_table.arrow_table
table_schema = {}
for name in table.schema.names:
field = table.schema.field(name)
md = field.metadata
_type = field.type
if not md:
md = {
"arrow_type_id": _type.id,
}
_d = {
"type_name": str(_type),
"metadata": md,
}
table_schema[name] = _d
schema = {
"column_names": table.column_names,
"column_schema": table_schema,
"rows": table.num_rows,
"size": table.nbytes,
}
md = TableMetadata.construct(**schema)
return KiaraTableMetadata.construct(table=md)
retrieve_supported_data_types()
classmethod
¶Source code in tabular/models/table.py
@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
return ["table"]