Skip to content

table

Classes

KiaraTable (KiaraModel) pydantic-model

A wrapper class to manage tabular data in a memory efficient way.

Source code in tabular/models/table.py
class KiaraTable(KiaraModel):
    """A wrapper class to manage tabular data in a memory efficient way."""

    @classmethod
    def create_table(cls, data: Any) -> "KiaraTable":
        """Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""

        table_obj = None
        if isinstance(data, KiaraTable):
            return data

        if isinstance(data, (pa.Table)):
            table_obj = data
        else:
            try:
                table_obj = pa.table(data)
            except Exception:
                pass

        if table_obj is None:
            raise Exception(
                f"Can't create table, invalid source data type: {type(data)}."
            )

        obj = KiaraTable()
        obj._table_obj = table_obj
        return obj

    data_path: Union[None, str] = Field(
        description="The path to the (feather) file backing this array.", default=None
    )
    """The path where the table object is store (for internal or read-only use)."""
    _table_obj: pa.Table = PrivateAttr(default=None)

    def _retrieve_data_to_hash(self) -> Any:
        raise NotImplementedError()

    @property
    def arrow_table(self) -> pa.Table:
        """Return the data as an Apache Arrow Table instance."""

        if self._table_obj is not None:
            return self._table_obj

        if not self.data_path:
            raise Exception("Can't retrieve table data, object not initialized (yet).")

        with pa.memory_map(self.data_path, "r") as source:
            table: pa.Table = pa.ipc.open_file(source).read_all()

        self._table_obj = table
        return self._table_obj

    @property
    def column_names(self) -> Iterable[str]:
        """Retrieve the names of all the columns of this table."""
        return self.arrow_table.column_names

    @property
    def num_rows(self) -> int:
        """Return the number of rows in this table."""
        return self.arrow_table.num_rows

    def to_pydict(self):
        """Convert and return the table data as a dictionary of lists.

        This will load all data into memory, so you might or might not want to do that.
        """
        return self.arrow_table.to_pydict()

    def to_pylist(self):
        """Convert and return the table data as a list of rows/dictionaries.

        This will load all data into memory, so you might or might not want to do that.
        """

        return self.arrow_table.to_pylist()

    def to_pandas(self):
        """Convert and return the table data to a Pandas dataframe.

        This will load all data into memory, so you might or might not want to do that.
        """
        return self.arrow_table.to_pandas()

Attributes

arrow_table: Table property readonly

Return the data as an Apache Arrow Table instance.

column_names: Iterable[str] property readonly

Retrieve the names of all the columns of this table.

data_path: str pydantic-field

The path to the (feather) file backing this array.

num_rows: int property readonly

Return the number of rows in this table.

Methods

create_table(data) classmethod

Create a KiaraTable instance from an Apache Arrow Table, or dict of lists.

Source code in tabular/models/table.py
@classmethod
def create_table(cls, data: Any) -> "KiaraTable":
    """Create a `KiaraTable` instance from an Apache Arrow Table, or dict of lists."""

    table_obj = None
    if isinstance(data, KiaraTable):
        return data

    if isinstance(data, (pa.Table)):
        table_obj = data
    else:
        try:
            table_obj = pa.table(data)
        except Exception:
            pass

    if table_obj is None:
        raise Exception(
            f"Can't create table, invalid source data type: {type(data)}."
        )

    obj = KiaraTable()
    obj._table_obj = table_obj
    return obj
to_pandas(self)

Convert and return the table data to a Pandas dataframe.

This will load all data into memory, so you might or might not want to do that.

Source code in tabular/models/table.py
def to_pandas(self):
    """Convert and return the table data to a Pandas dataframe.

    This will load all data into memory, so you might or might not want to do that.
    """
    return self.arrow_table.to_pandas()
to_pydict(self)

Convert and return the table data as a dictionary of lists.

This will load all data into memory, so you might or might not want to do that.

Source code in tabular/models/table.py
def to_pydict(self):
    """Convert and return the table data as a dictionary of lists.

    This will load all data into memory, so you might or might not want to do that.
    """
    return self.arrow_table.to_pydict()
to_pylist(self)

Convert and return the table data as a list of rows/dictionaries.

This will load all data into memory, so you might or might not want to do that.

Source code in tabular/models/table.py
def to_pylist(self):
    """Convert and return the table data as a list of rows/dictionaries.

    This will load all data into memory, so you might or might not want to do that.
    """

    return self.arrow_table.to_pylist()

KiaraTableMetadata (ValueMetadata) pydantic-model

File stats.

Source code in tabular/models/table.py
class KiaraTableMetadata(ValueMetadata):
    """File stats."""

    _metadata_key = "table"

    @classmethod
    def retrieve_supported_data_types(cls) -> Iterable[str]:
        return ["table"]

    @classmethod
    def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":

        kiara_table: KiaraTable = value.data

        table: pa.Table = kiara_table.arrow_table

        table_schema = {}
        for name in table.schema.names:
            field = table.schema.field(name)
            md = field.metadata
            _type = field.type
            if not md:
                md = {
                    "arrow_type_id": _type.id,
                }
            _d = {
                "type_name": str(_type),
                "metadata": md,
            }
            table_schema[name] = _d

        schema = {
            "column_names": table.column_names,
            "column_schema": table_schema,
            "rows": table.num_rows,
            "size": table.nbytes,
        }

        md = TableMetadata.construct(**schema)
        return KiaraTableMetadata.construct(table=md)

    table: TableMetadata = Field(description="The table schema.")

Attributes

table: TableMetadata pydantic-field required

The table schema.

create_value_metadata(value) classmethod
Source code in tabular/models/table.py
@classmethod
def create_value_metadata(cls, value: "Value") -> "KiaraTableMetadata":

    kiara_table: KiaraTable = value.data

    table: pa.Table = kiara_table.arrow_table

    table_schema = {}
    for name in table.schema.names:
        field = table.schema.field(name)
        md = field.metadata
        _type = field.type
        if not md:
            md = {
                "arrow_type_id": _type.id,
            }
        _d = {
            "type_name": str(_type),
            "metadata": md,
        }
        table_schema[name] = _d

    schema = {
        "column_names": table.column_names,
        "column_schema": table_schema,
        "rows": table.num_rows,
        "size": table.nbytes,
    }

    md = TableMetadata.construct(**schema)
    return KiaraTableMetadata.construct(table=md)
retrieve_supported_data_types() classmethod
Source code in tabular/models/table.py
@classmethod
def retrieve_supported_data_types(cls) -> Iterable[str]:
    return ["table"]