models

This module contains the metadata (and other) models that are used in the kiara_plugin.tabular package.

Those models are convenience wrappers that make it easier for kiara to find, create, manage and version metadata -- but also other type of models -- that is attached to data, as well as kiara modules.

Metadata models must be a sub-class of [kiara.metadata.MetadataModel][]. Other models usually sub-class a pydantic BaseModel or implement custom base classes.

Classes¶

`ColumnSchema (BaseModel)` `pydantic-model` ¶

Describes properties of a single column of the 'table' data type.

Source code in tabular/models/__init__.py

class ColumnSchema(BaseModel):
    """Describes properties of a single column of the 'table' data type."""

    type_name: str = Field(
        description="The type name of the column (backend-specific)."
    )
    metadata: Dict[str, Any] = Field(
        description="Other metadata for the column.", default_factory=dict
    )

Attributes¶

`metadata: Dict[str, Any]` `pydantic-field` ¶

Other metadata for the column.

`type_name: str` `pydantic-field` `required` ¶

The type name of the column (backend-specific).

`TableMetadata (KiaraModel)` `pydantic-model` ¶

Describes properties for the 'table' data type.

Source code in tabular/models/__init__.py

class TableMetadata(KiaraModel):
    """Describes properties for the 'table' data type."""

    column_names: List[str] = Field(description="The name of the columns of the table.")
    column_schema: Dict[str, ColumnSchema] = Field(
        description="The schema description of the table."
    )
    rows: int = Field(description="The number of rows the table contains.")
    size: Union[int, None] = Field(
        description="The tables size in bytes.", default=None
    )

    def _retrieve_data_to_hash(self) -> Any:

        return {
            "column_schemas": {k: v.dict() for k, v in self.column_schema.items()},
            "rows": self.rows,
            "size": self.size,
        }

Attributes¶

`column_names: List[str]` `pydantic-field` `required` ¶

The name of the columns of the table.

`column_schema: Dict[str, kiara_plugin.tabular.models.ColumnSchema]` `pydantic-field` `required` ¶

The schema description of the table.

`rows: int` `pydantic-field` `required` ¶

The number of rows the table contains.

`size: int` `pydantic-field` ¶

The tables size in bytes.

Modules¶

`array` ¶

Classes¶

`KiaraArray (KiaraModel)` `pydantic-model` ¶

A class to manage array-like data.

Internally, this uses an Apache Arrow Array to handle the data in memory and on disk.

Source code in tabular/models/array.py

class KiaraArray(KiaraModel):
    """A class to manage array-like data.

    Internally, this uses an [Apache Arrow Array](https://arrow.apache.org/docs/python/generated/pyarrow.Array.html#pyarrow.Array) to handle the data in memory and on disk.
    """

    # @classmethod
    # def create_in_temp_dir(cls, ):
    #
    #     temp_f = tempfile.mkdtemp()
    #     file_path = os.path.join(temp_f, "array.feather")
    #
    #     def cleanup():
    #         shutil.rmtree(file_path, ignore_errors=True)
    #
    #     atexit.register(cleanup)
    #
    #     array_obj = cls(feather_path=file_path)
    #     return array_obj

    @classmethod
    def create_array(cls, data: Any) -> "KiaraArray":

        if isinstance(data, KiaraArray):
            return data

        array_obj = None
        if isinstance(data, (pa.Array, pa.ChunkedArray)):
            array_obj = data
        elif isinstance(data, pa.Table):
            if len(data.columns) != 1:
                raise Exception(
                    f"Invalid type, only Arrow Arrays or single-column Tables allowed. This value is a table with {len(data.columns)} columns."
                )
            array_obj = data.column(0)
        else:
            try:
                array_obj = pa.array(data)
            except Exception:
                pass

        if array_obj is None:
            raise Exception(
                f"Can't create table, invalid source data type: {type(data)}."
            )

        obj = KiaraArray()
        if not isinstance(array_obj, pa.lib.ChunkedArray):
            array_obj = pa.chunked_array(array_obj)
        obj._array_obj = array_obj
        return obj

    data_path: Union[str, None] = Field(
        description="The path to the (feather) file backing this array.", default=None
    )

    _array_obj: pa.Array = PrivateAttr(default=None)

    def _retrieve_data_to_hash(self) -> Any:
        raise NotImplementedError()

    def __len__(self):
        return len(self.arrow_array)

    @property
    def arrow_array(self) -> pa.Array:

        if self._array_obj is not None:
            return self._array_obj

        if not self.data_path:
            raise Exception("Can't retrieve array data, object not initialized (yet).")

        with pa.memory_map(self.data_path, "r") as source:
            table: pa.Table = pa.ipc.open_file(source).read_all()

        if len(table.columns) != 1:
            raise Exception(
                f"Invalid serialized array data, only a single-column Table is allowed. This value is a table with {len(table.columns)} columns."
            )

        self._array_obj = table.column(0)
        return self._array_obj

    def to_pylist(self):
        return self.arrow_array.to_pylist()

    def to_pandas(self):
        return self.arrow_array.to_pandas()

Attributes¶

arrow_array: Array property readonly ¶

data_path: str pydantic-field ¶

The path to the (feather) file backing this array.

create_array(data) classmethod ¶

Source code in tabular/models/array.py

@classmethod
def create_array(cls, data: Any) -> "KiaraArray":

    if isinstance(data, KiaraArray):
        return data

    array_obj = None
    if isinstance(data, (pa.Array, pa.ChunkedArray)):
        array_obj = data
    elif isinstance(data, pa.Table):
        if len(data.columns) != 1:
            raise Exception(
                f"Invalid type, only Arrow Arrays or single-column Tables allowed. This value is a table with {len(data.columns)} columns."
            )
        array_obj = data.column(0)
    else:
        try:
            array_obj = pa.array(data)
        except Exception:
            pass

    if array_obj is None:
        raise Exception(
            f"Can't create table, invalid source data type: {type(data)}."
        )

    obj = KiaraArray()
    if not isinstance(array_obj, pa.lib.ChunkedArray):
        array_obj = pa.chunked_array(array_obj)
    obj._array_obj = array_obj
    return obj

to_pandas(self) ¶

Source code in tabular/models/array.py

def to_pandas(self):
    return self.arrow_array.to_pandas()

to_pylist(self) ¶

Source code in tabular/models/array.py

def to_pylist(self):
    return self.arrow_array.to_pylist()

`db` ¶

Classes¶

`DatabaseMetadata (ValueMetadata)` `pydantic-model` ¶

Database and table properties.

Source code in tabular/models/db.py

class DatabaseMetadata(ValueMetadata):
    """Database and table properties."""

    _metadata_key = "database"

    @classmethod
    def retrieve_supported_data_types(cls) -> Iterable[str]:
        return ["database"]

    @classmethod
    def create_value_metadata(cls, value: Value) -> "DatabaseMetadata":

        database: KiaraDatabase = value.data

        insp = database.get_sqlalchemy_inspector()

        mds = {}

        for table_name in insp.get_table_names():

            with database.get_sqlalchemy_engine().connect() as con:
                result = con.execute(text(f"SELECT count(*) from {table_name}"))
                num_rows = result.fetchone()[0]

                try:
                    result = con.execute(
                        text(
                            f'SELECT SUM("pgsize") FROM "dbstat" WHERE name="{table_name}"'
                        )
                    )
                    size: Union[int, None] = result.fetchone()[0]
                except Exception:
                    size = None

            columns = {}
            for column in insp.get_columns(table_name=table_name):
                name = column["name"]
                _type = column["type"]
                type_name = SQLALCHEMY_SQLITE_TYPE_MAP[type(_type)]
                columns[name] = {
                    "type_name": type_name,
                    "metadata": {
                        "nullable": column["nullable"],
                        "primary_key": True if column["primary_key"] else False,
                    },
                }

            schema = {
                "column_names": list(columns.keys()),
                "column_schema": columns,
                "rows": num_rows,
                "size": size,
            }

            md = TableMetadata(**schema)
            mds[table_name] = md

        return DatabaseMetadata.construct(tables=mds)

    tables: Dict[str, TableMetadata] = Field(description="The table schema.")

Attributes¶

tables: Dict[str, kiara_plugin.tabular.models.TableMetadata] pydantic-field required ¶

The table schema.

create_value_metadata(value) classmethod ¶

Source code in tabular/models/db.py

@classmethod
def create_value_metadata(cls, value: Value) -> "DatabaseMetadata":

    database: KiaraDatabase = value.data

    insp = database.get_sqlalchemy_inspector()

    mds = {}

    for table_name in insp.get_table_names():

        with database.get_sqlalchemy_engine().connect() as con:
            result = con.execute(text(f"SELECT count(*) from {table_name}"))
            num_rows = result.fetchone()[0]

            try:
                result = con.execute(
                    text(
                        f'SELECT SUM("pgsize") FROM "dbstat" WHERE name="{table_name}"'
                    )
                )
                size: Union[int, None] = result.fetchone()[0]
            except Exception:
                size = None

        columns = {}
        for column in insp.get_columns(table_name=table_name):
            name = column["name"]
            _type = column["type"]
            type_name = SQLALCHEMY_SQLITE_TYPE_MAP[type(_type)]
            columns[name] = {
                "type_name": type_name,
                "metadata": {
                    "nullable": column["nullable"],
                    "primary_key": True if column["primary_key"] else False,
                },
            }

        schema = {
            "column_names": list(columns.keys()),
            "column_schema": columns,
            "rows": num_rows,
            "size": size,
        }

        md = TableMetadata(**schema)
        mds[table_name] = md

    return DatabaseMetadata.construct(tables=mds)

retrieve_supported_data_types() classmethod ¶

Source code in tabular/models/db.py

@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
    return ["database"]

`KiaraDatabase (KiaraModel)` `pydantic-model` ¶

A wrapper class to manage a sqlite database.

Source code in tabular/models/db.py

class KiaraDatabase(KiaraModel):
    """A wrapper class to manage a sqlite database."""

    @classmethod
    def create_in_temp_dir(
        cls,
        init_statement: Union[None, str, "TextClause"] = None,
        init_data: Union[Mapping[str, Any], None] = None,
    ):

        temp_f = tempfile.mkdtemp()
        db_path = os.path.join(temp_f, "db.sqlite")

        def cleanup():
            shutil.rmtree(db_path, ignore_errors=True)

        atexit.register(cleanup)

        db = cls(db_file_path=db_path)
        db.create_if_not_exists()

        if init_statement:
            db._unlock_db()
            db.execute_sql(statement=init_statement, data=init_data, invalidate=True)
            db._lock_db()

        return db

    db_file_path: str = Field(description="The path to the sqlite database file.")

    _cached_engine = PrivateAttr(default=None)
    _cached_inspector = PrivateAttr(default=None)
    _table_names = PrivateAttr(default=None)
    _tables: Dict[str, Table] = PrivateAttr(default_factory=dict)
    _metadata_obj: Union[MetaData, None] = PrivateAttr(default=None)
    # _table_schemas: Optional[Dict[str, SqliteTableSchema]] = PrivateAttr(default=None)
    # _file_hash: Optional[str] = PrivateAttr(default=None)
    _file_cid: Union[CID, None] = PrivateAttr(default=None)
    _lock: bool = PrivateAttr(default=True)
    _immutable: bool = PrivateAttr(default=None)

    def _retrieve_id(self) -> str:
        return str(self.file_cid)

    def _retrieve_data_to_hash(self) -> Any:
        return self.file_cid

    @validator("db_file_path", allow_reuse=True)
    def ensure_absolute_path(cls, path: str):

        path = os.path.abspath(path)
        if not os.path.exists(os.path.dirname(path)):
            raise ValueError(f"Parent folder for database file does not exist: {path}")
        return path

    @property
    def db_url(self) -> str:
        return f"sqlite:///{self.db_file_path}"

    @property
    def file_cid(self) -> CID:

        if self._file_cid is not None:
            return self._file_cid

        self._file_cid = compute_cid_from_file(file=self.db_file_path, codec="raw")
        return self._file_cid

    def get_sqlalchemy_engine(self) -> "Engine":

        if self._cached_engine is not None:
            return self._cached_engine

        def _pragma_on_connect(dbapi_con, con_record):
            dbapi_con.execute("PRAGMA query_only = ON")

        self._cached_engine = create_engine(self.db_url, future=True)

        if self._lock:
            event.listen(self._cached_engine, "connect", _pragma_on_connect)

        return self._cached_engine

    def _lock_db(self):
        self._lock = True
        self._invalidate()

    def _unlock_db(self):
        if self._immutable:
            raise Exception("Can't unlock db, it's immutable.")
        self._lock = False
        self._invalidate()

    def create_if_not_exists(self):

        from sqlalchemy_utils import create_database, database_exists

        if not database_exists(self.db_url):
            create_database(self.db_url)

    def execute_sql(
        self,
        statement: Union[str, "TextClause"],
        data: Union[Mapping[str, Any], None] = None,
        invalidate: bool = False,
    ):
        """Execute an sql script.

        Arguments:
          statement: the sql statement
          data: (optional) data, to be bound to the statement
          invalidate: whether to invalidate cached values within this object
        """

        if isinstance(statement, str):
            statement = text(statement)

        if data:
            statement.bindparams(**data)

        with self.get_sqlalchemy_engine().connect() as con:
            con.execute(statement)

        if invalidate:
            self._invalidate()

    def _invalidate(self):
        self._cached_engine = None
        self._cached_inspector = None
        self._table_names = None
        # self._file_hash = None
        self._metadata_obj = None
        self._tables.clear()

    def _invalidate_other(self):
        pass

    def get_sqlalchemy_metadata(self) -> MetaData:
        """Return the sqlalchemy Metadtaa object for the underlying database.

        This is used internally, you typically don't need to access this attribute.

        """

        if self._metadata_obj is None:
            self._metadata_obj = MetaData()
        return self._metadata_obj

    def copy_database_file(self, target: str):

        os.makedirs(os.path.dirname(target))

        shutil.copy2(self.db_file_path, target)

        new_db = KiaraDatabase(db_file_path=target)
        # if self._file_hash:
        #     new_db._file_hash = self._file_hash
        return new_db

    def get_sqlalchemy_inspector(self) -> Inspector:

        if self._cached_inspector is not None:
            return self._cached_inspector

        self._cached_inspector = inspect(self.get_sqlalchemy_engine())
        return self._cached_inspector

    @property
    def table_names(self) -> Iterable[str]:
        if self._table_names is not None:
            return self._table_names

        self._table_names = self.get_sqlalchemy_inspector().get_table_names()
        return self._table_names

    def get_sqlalchemy_table(self, table_name: str) -> Table:
        """Return the sqlalchemy edges table instance for this network datab."""

        if table_name in self._tables.keys():
            return self._tables[table_name]

        table = Table(
            table_name,
            self.get_sqlalchemy_metadata(),
            autoload_with=self.get_sqlalchemy_engine(),
        )
        self._tables[table_name] = table
        return table

Attributes¶

db_file_path: str pydantic-field required ¶

The path to the sqlite database file.

db_url: str property readonly ¶

file_cid: CID property readonly ¶

table_names: Iterable[str] property readonly ¶

Methods¶

copy_database_file(self, target) ¶

Source code in tabular/models/db.py

def copy_database_file(self, target: str):

    os.makedirs(os.path.dirname(target))

    shutil.copy2(self.db_file_path, target)

    new_db = KiaraDatabase(db_file_path=target)
    # if self._file_hash:
    #     new_db._file_hash = self._file_hash
    return new_db

create_if_not_exists(self) ¶

Source code in tabular/models/db.py

def create_if_not_exists(self):

    from sqlalchemy_utils import create_database, database_exists

    if not database_exists(self.db_url):
        create_database(self.db_url)

create_in_temp_dir(init_statement=None, init_data=None) classmethod ¶

Source code in tabular/models/db.py

@classmethod
def create_in_temp_dir(
    cls,
    init_statement: Union[None, str, "TextClause"] = None,
    init_data: Union[Mapping[str, Any], None] = None,
):

    temp_f = tempfile.mkdtemp()
    db_path = os.path.join(temp_f, "db.sqlite")

    def cleanup():
        shutil.rmtree(db_path, ignore_errors=True)

    atexit.register(cleanup)

    db = cls(db_file_path=db_path)
    db.create_if_not_exists()

    if init_statement:
        db._unlock_db()
        db.execute_sql(statement=init_statement, data=init_data, invalidate=True)
        db._lock_db()

    return db

ensure_absolute_path(path) classmethod ¶

Source code in tabular/models/db.py

@validator("db_file_path", allow_reuse=True)
def ensure_absolute_path(cls, path: str):

    path = os.path.abspath(path)
    if not os.path.exists(os.path.dirname(path)):
        raise ValueError(f"Parent folder for database file does not exist: {path}")
    return path

execute_sql(self, statement, data=None, invalidate=False) ¶

Execute an sql script.

Parameters:

Name	Type	Description	Default
`statement`	`Union[str, TextClause]`	the sql statement	required
`data`	`Optional[Mapping[str, Any]]`	(optional) data, to be bound to the statement	`None`
`invalidate`	`bool`	whether to invalidate cached values within this object	`False`

Source code in tabular/models/db.py

def execute_sql(
    self,
    statement: Union[str, "TextClause"],
    data: Union[Mapping[str, Any], None] = None,
    invalidate: bool = False,
):
    """Execute an sql script.

    Arguments:
      statement: the sql statement
      data: (optional) data, to be bound to the statement
      invalidate: whether to invalidate cached values within this object
    """

    if isinstance(statement, str):
        statement = text(statement)

    if data:
        statement.bindparams(**data)

    with self.get_sqlalchemy_engine().connect() as con:
        con.execute(statement)

    if invalidate:
        self._invalidate()

get_sqlalchemy_engine(self) ¶

Source code in tabular/models/db.py

def get_sqlalchemy_engine(self) -> "Engine":

    if self._cached_engine is not None:
        return self._cached_engine

    def _pragma_on_connect(dbapi_con, con_record):
        dbapi_con.execute("PRAGMA query_only = ON")

    self._cached_engine = create_engine(self.db_url, future=True)

    if self._lock:
        event.listen(self._cached_engine, "connect", _pragma_on_connect)

    return self._cached_engine

get_sqlalchemy_inspector(self) ¶

Source code in tabular/models/db.py

def get_sqlalchemy_inspector(self) -> Inspector:

    if self._cached_inspector is not None:
        return self._cached_inspector

    self._cached_inspector = inspect(self.get_sqlalchemy_engine())
    return self._cached_inspector

get_sqlalchemy_metadata(self) ¶

Return the sqlalchemy Metadtaa object for the underlying database.

This is used internally, you typically don't need to access this attribute.

Source code in tabular/models/db.py

def get_sqlalchemy_metadata(self) -> MetaData:
    """Return the sqlalchemy Metadtaa object for the underlying database.

    This is used internally, you typically don't need to access this attribute.

    """

    if self._metadata_obj is None:
        self._metadata_obj = MetaData()
    return self._metadata_obj

get_sqlalchemy_table(self, table_name) ¶

Return the sqlalchemy edges table instance for this network datab.

Source code in tabular/models/db.py

def get_sqlalchemy_table(self, table_name: str) -> Table:
    """Return the sqlalchemy edges table instance for this network datab."""

    if table_name in self._tables.keys():
        return self._tables[table_name]

    table = Table(
        table_name,
        self.get_sqlalchemy_metadata(),
        autoload_with=self.get_sqlalchemy_engine(),
    )
    self._tables[table_name] = table
    return table

`SqliteTableSchema (BaseModel)` `pydantic-model` ¶

Source code in tabular/models/db.py

class SqliteTableSchema(BaseModel):

    columns: Dict[str, SqliteDataType] = Field(
        description="The table columns and their attributes."
    )
    index_columns: List[str] = Field(
        description="The columns to index", default_factory=list
    )
    nullable_columns: List[str] = Field(
        description="The columns that are nullable.", default_factory=list
    )
    unique_columns: List[str] = Field(
        description="The columns that should be marked 'UNIQUE'.", default_factory=list
    )
    primary_key: Union[str, None] = Field(
        description="The primary key for this table.", default=None
    )

    def create_table_metadata(
        self,
        table_name: str,
    ) -> Tuple[MetaData, Table]:
        """Create an sql script to initialize a table.

        Arguments:
            column_attrs: a map with the column name as key, and column details ('type', 'extra_column_info', 'create_index') as values
        """

        table_columns = []
        for column_name, data_type in self.columns.items():
            column_obj = Column(
                column_name,
                SQLITE_SQLALCHEMY_TYPE_MAP[data_type],
                nullable=column_name in self.nullable_columns,
                primary_key=column_name == self.primary_key,
                index=column_name in self.index_columns,
                unique=column_name in self.unique_columns,
            )
            table_columns.append(column_obj)

        meta = MetaData()
        table = Table(table_name, meta, *table_columns)
        return meta, table

    def create_table(self, table_name: str, engine: Engine) -> Table:

        meta, table = self.create_table_metadata(table_name=table_name)
        meta.create_all(engine)
        return table

Attributes¶

columns: Dict[str, Literal['NULL', 'INTEGER', 'REAL', 'TEXT', 'BLOB']] pydantic-field required ¶

The table columns and their attributes.

index_columns: List[str] pydantic-field ¶

The columns to index

nullable_columns: List[str] pydantic-field ¶

The columns that are nullable.

primary_key: str pydantic-field ¶

The primary key for this table.

unique_columns: List[str] pydantic-field ¶

The columns that should be marked 'UNIQUE'.

Methods¶

create_table(self, table_name, engine) ¶

Source code in tabular/models/db.py

def create_table(self, table_name: str, engine: Engine) -> Table:

    meta, table = self.create_table_metadata(table_name=table_name)
    meta.create_all(engine)
    return table

create_table_metadata(self, table_name) ¶

Create an sql script to initialize a table.

Parameters:

Name	Type	Description	Default
`column_attrs`		a map with the column name as key, and column details ('type', 'extra_column_info', 'create_index') as values	required

Source code in tabular/models/db.py

def create_table_metadata(
    self,
    table_name: str,
) -> Tuple[MetaData, Table]:
    """Create an sql script to initialize a table.

    Arguments:
        column_attrs: a map with the column name as key, and column details ('type', 'extra_column_info', 'create_index') as values
    """

    table_columns = []
    for column_name, data_type in self.columns.items():
        column_obj = Column(
            column_name,
            SQLITE_SQLALCHEMY_TYPE_MAP[data_type],
            nullable=column_name in self.nullable_columns,
            primary_key=column_name == self.primary_key,
            index=column_name in self.index_columns,
            unique=column_name in self.unique_columns,
        )
        table_columns.append(column_obj)

    meta = MetaData()
    table = Table(table_name, meta, *table_columns)
    return meta, table

`table` ¶

Classes¶

`KiaraTable (KiaraModel)` `pydantic-model` ¶

A wrapper class to manage tabular data in a memory efficient way.

Source code in tabular/models/table.py

class KiaraTable(KiaraModel):
    """A wrapper class to manage tabular data in a memory efficient way."""

    @classmethod
    def create_table(cls, data: Any) -> "KiaraTable":
        """Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""

        table_obj = None
        if isinstance(data, KiaraTable):
            return data

        if isinstance(data, (pa.Table)):
            table_obj = data
        else:
            try:
                table_obj = pa.table(data)
            except Exception:
                pass

        if table_obj is None:
            raise Exception(
                f"Can't create table, invalid source data type: {type(data)}."
            )

        obj = KiaraTable()
        obj._table_obj = table_obj
        return obj

    data_path: Union[None, str] = Field(
        description="The path to the (feather) file backing this array.", default=None
    )
    """The path where the table object is store (for internal or read-only use)."""
    _table_obj: pa.Table = PrivateAttr(default=None)

    def _retrieve_data_to_hash(self) -> Any:
        raise NotImplementedError()

    @property
    def arrow_table(self) -> pa.Table:
        """Return the data as an Apache Arrow Table instance."""

        if self._table_obj is not None:
            return self._table_obj

        if not self.data_path:
            raise Exception("Can't retrieve table data, object not initialized (yet).")

        with pa.memory_map(self.data_path, "r") as source:
            table: pa.Table = pa.ipc.open_file(source).read_all()

        self._table_obj = table
        return self._table_obj

    @property
    def column_names(self) -> Iterable[str]:
        """Retrieve the names of all the columns of this table."""
        return self.arrow_table.column_names

    @property
    def num_rows(self) -> int:
        """Return the number of rows in this table."""
        return self.arrow_table.num_rows

    def to_pydict(self):
        """Convert and return the table data as a dictionary of lists.

        This will load all data into memory, so you might or might not want to do that.
        """
        return self.arrow_table.to_pydict()

    def to_pylist(self):
        """Convert and return the table data as a list of rows/dictionaries.

        This will load all data into memory, so you might or might not want to do that.
        """

        return self.arrow_table.to_pylist()

    def to_pandas(self):
        """Convert and return the table data to a Pandas dataframe.

        This will load all data into memory, so you might or might not want to do that.
        """
        return self.arrow_table.to_pandas()

Attributes¶

arrow_table: Table property readonly ¶

Return the data as an Apache Arrow Table instance.

column_names: Iterable[str] property readonly ¶

Retrieve the names of all the columns of this table.

data_path: str pydantic-field ¶

The path to the (feather) file backing this array.

num_rows: int property readonly ¶

Return the number of rows in this table.

Methods¶

create_table(data) classmethod ¶

Create a KiaraTable instance from an Apache Arrow Table, or dict of lists.

Source code in tabular/models/table.py

@classmethod
def create_table(cls, data: Any) -> "KiaraTable":
    """Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""

    table_obj = None
    if isinstance(data, KiaraTable):
        return data

    if isinstance(data, (pa.Table)):
        table_obj = data
    else:
        try:
            table_obj = pa.table(data)
        except Exception:
            pass

    if table_obj is None:
        raise Exception(
            f"Can't create table, invalid source data type: {type(data)}."
        )

    obj = KiaraTable()
    obj._table_obj = table_obj
    return obj

to_pandas(self) ¶

Convert and return the table data to a Pandas dataframe.

This will load all data into memory, so you might or might not want to do that.

Source code in tabular/models/table.py

def to_pandas(self):
    """Convert and return the table data to a Pandas dataframe.

    This will load all data into memory, so you might or might not want to do that.
    """
    return self.arrow_table.to_pandas()

to_pydict(self) ¶

Convert and return the table data as a dictionary of lists.

This will load all data into memory, so you might or might not want to do that.

Source code in tabular/models/table.py

def to_pydict(self):
    """Convert and return the table data as a dictionary of lists.

    This will load all data into memory, so you might or might not want to do that.
    """
    return self.arrow_table.to_pydict()

to_pylist(self) ¶

Convert and return the table data as a list of rows/dictionaries.

This will load all data into memory, so you might or might not want to do that.

Source code in tabular/models/table.py

def to_pylist(self):
    """Convert and return the table data as a list of rows/dictionaries.

    This will load all data into memory, so you might or might not want to do that.
    """

    return self.arrow_table.to_pylist()

`KiaraTableMetadata (ValueMetadata)` `pydantic-model` ¶

File stats.

Source code in tabular/models/table.py

class KiaraTableMetadata(ValueMetadata):
    """File stats."""

    _metadata_key = "table"

    @classmethod
    def retrieve_supported_data_types(cls) -> Iterable[str]:
        return ["table"]

    @classmethod
    def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":

        kiara_table: KiaraTable = value.data

        table: pa.Table = kiara_table.arrow_table

        table_schema = {}
        for name in table.schema.names:
            field = table.schema.field(name)
            md = field.metadata
            _type = field.type
            if not md:
                md = {
                    "arrow_type_id": _type.id,
                }
            _d = {
                "type_name": str(_type),
                "metadata": md,
            }
            table_schema[name] = _d

        schema = {
            "column_names": table.column_names,
            "column_schema": table_schema,
            "rows": table.num_rows,
            "size": table.nbytes,
        }

        md = TableMetadata.construct(**schema)
        return KiaraTableMetadata.construct(table=md)

    table: TableMetadata = Field(description="The table schema.")

Attributes¶

table: TableMetadata pydantic-field required ¶

The table schema.

create_value_metadata(value) classmethod ¶

Source code in tabular/models/table.py

@classmethod
def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":

    kiara_table: KiaraTable = value.data

    table: pa.Table = kiara_table.arrow_table

    table_schema = {}
    for name in table.schema.names:
        field = table.schema.field(name)
        md = field.metadata
        _type = field.type
        if not md:
            md = {
                "arrow_type_id": _type.id,
            }
        _d = {
            "type_name": str(_type),
            "metadata": md,
        }
        table_schema[name] = _d

    schema = {
        "column_names": table.column_names,
        "column_schema": table_schema,
        "rows": table.num_rows,
        "size": table.nbytes,
    }

    md = TableMetadata.construct(**schema)
    return KiaraTableMetadata.construct(table=md)

retrieve_supported_data_types() classmethod ¶

Source code in tabular/models/table.py

@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
    return ["table"]

models

Classes¶

ColumnSchema (BaseModel) pydantic-model ¶

Attributes¶

metadata: Dict[str, Any] pydantic-field ¶

type_name: str pydantic-field required ¶

TableMetadata (KiaraModel) pydantic-model ¶

Attributes¶

column_names: List[str] pydantic-field required ¶

column_schema: Dict[str, kiara_plugin.tabular.models.ColumnSchema] pydantic-field required ¶

rows: int pydantic-field required ¶

size: int pydantic-field ¶

Modules¶

array ¶

Classes¶

KiaraArray (KiaraModel) pydantic-model ¶

Attributes¶

db ¶

Classes¶

DatabaseMetadata (ValueMetadata) pydantic-model ¶

Attributes¶

KiaraDatabase (KiaraModel) pydantic-model ¶

Attributes¶

Methods¶

SqliteTableSchema (BaseModel) pydantic-model ¶

Attributes¶

Methods¶

table ¶

Classes¶

KiaraTable (KiaraModel) pydantic-model ¶

Attributes¶

Methods¶

KiaraTableMetadata (ValueMetadata) pydantic-model ¶

Attributes¶

`ColumnSchema (BaseModel)` `pydantic-model` ¶

`metadata: Dict[str, Any]` `pydantic-field` ¶

`type_name: str` `pydantic-field` `required` ¶

`TableMetadata (KiaraModel)` `pydantic-model` ¶

`column_names: List[str]` `pydantic-field` `required` ¶

`column_schema: Dict[str, kiara_plugin.tabular.models.ColumnSchema]` `pydantic-field` `required` ¶

`rows: int` `pydantic-field` `required` ¶

`size: int` `pydantic-field` ¶

`array` ¶

`KiaraArray (KiaraModel)` `pydantic-model` ¶

`db` ¶

`DatabaseMetadata (ValueMetadata)` `pydantic-model` ¶

`KiaraDatabase (KiaraModel)` `pydantic-model` ¶

`SqliteTableSchema (BaseModel)` `pydantic-model` ¶

`table` ¶

`KiaraTable (KiaraModel)` `pydantic-model` ¶

`KiaraTableMetadata (ValueMetadata)` `pydantic-model` ¶