Skip to content

models

This module contains the metadata (and other) models that are used in the kiara_plugin.tabular package.

Those models are convenience wrappers that make it easier for kiara to find, create, manage and version metadata -- but also other type of models -- that is attached to data, as well as kiara modules.

Metadata models must be a sub-class of [kiara.metadata.MetadataModel][]. Other models usually sub-class a pydantic BaseModel or implement custom base classes.

Classes

StorageBackend

Bases: BaseModel

Describes the storage backend type that is used, and (optionally) some backend-specific properties.

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/__init__.py
21
22
23
24
25
26
27
class StorageBackend(BaseModel):
    """Describes the storage backend type that is used, and (optionally) some backend-specific properties."""

    name: str = Field(description="The name of the storage backend.")
    properties: Dict[str, Any] = Field(
        description="Backend-specific properties.", default_factory=dict
    )

Attributes

name: str = Field(description='The name of the storage backend.') class-attribute instance-attribute
properties: Dict[str, Any] = Field(description='Backend-specific properties.', default_factory=dict) class-attribute instance-attribute

ColumnSchema

Bases: BaseModel

Describes properties of a single column of the 'table' data type.

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/__init__.py
30
31
32
33
34
35
36
37
38
39
40
41
42
class ColumnSchema(BaseModel):
    """Describes properties of a single column of the 'table' data type."""

    type_name: str = Field(
        description="The type name of the column (backend-specific)."
    )
    metadata: Dict[str, Dict[str, Any]] = Field(
        description="Other metadata for the column.", default_factory=dict
    )

    def _retrieve_data_to_hash(self) -> Any:

        return self.dict()

Attributes

type_name: str = Field(description='The type name of the column (backend-specific).') class-attribute instance-attribute
metadata: Dict[str, Dict[str, Any]] = Field(description='Other metadata for the column.', default_factory=dict) class-attribute instance-attribute

TableMetadata

Bases: KiaraModel

Describes properties for the 'table' data type.

Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/__init__.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class TableMetadata(KiaraModel):
    """Describes properties for the 'table' data type."""

    @classmethod
    def create_from_table(cls, table: "KiaraTable") -> "TableMetadata":

        arrow_table = table.arrow_table
        table_schema: Dict[str, Any] = {}

        backend_properties: Dict[str, Any] = {"column_types": {}}

        for name in arrow_table.schema.names:
            field = arrow_table.schema.field(name)
            md = table.get_column_metadata(column_name=name)
            _type = field.type
            backend_properties["column_types"][name] = {
                "type_id": _type.id,
                "size": arrow_table[name].nbytes,
            }
            _d = {
                "type_name": str(_type),
                "metadata": md,
            }
            table_schema[name] = _d

        backend = StorageBackend(name="arrow", properties=backend_properties)
        schema = {
            "column_names": table.column_names,
            "column_schema": table_schema,
            "backend": backend,
            "rows": table.num_rows,
            "size": arrow_table.nbytes,
        }

        result = TableMetadata.construct(**schema)
        return result

    column_names: List[str] = Field(description="The name of the columns of the table.")
    column_schema: Dict[str, ColumnSchema] = Field(
        description="The schema description of the table."
    )
    backend: StorageBackend = Field(description="The storage backend that is used.")
    rows: int = Field(description="The number of rows the table contains.")
    size: Union[int, None] = Field(
        description="The tables size in bytes.", default=None
    )

    def _retrieve_data_to_hash(self) -> Any:

        return {
            "column_schemas": {k: v.dict() for k, v in self.column_schema.items()},
            "rows": self.rows,
            "size": self.size,
        }

Attributes

column_names: List[str] = Field(description='The name of the columns of the table.') class-attribute instance-attribute
column_schema: Dict[str, ColumnSchema] = Field(description='The schema description of the table.') class-attribute instance-attribute
backend: StorageBackend = Field(description='The storage backend that is used.') class-attribute instance-attribute
rows: int = Field(description='The number of rows the table contains.') class-attribute instance-attribute
size: Union[int, None] = Field(description='The tables size in bytes.', default=None) class-attribute instance-attribute

Functions

create_from_table(table: KiaraTable) -> TableMetadata classmethod
Source code in /opt/hostedtoolcache/Python/3.11.4/x64/lib/python3.11/site-packages/kiara_plugin/tabular/models/__init__.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
@classmethod
def create_from_table(cls, table: "KiaraTable") -> "TableMetadata":

    arrow_table = table.arrow_table
    table_schema: Dict[str, Any] = {}

    backend_properties: Dict[str, Any] = {"column_types": {}}

    for name in arrow_table.schema.names:
        field = arrow_table.schema.field(name)
        md = table.get_column_metadata(column_name=name)
        _type = field.type
        backend_properties["column_types"][name] = {
            "type_id": _type.id,
            "size": arrow_table[name].nbytes,
        }
        _d = {
            "type_name": str(_type),
            "metadata": md,
        }
        table_schema[name] = _d

    backend = StorageBackend(name="arrow", properties=backend_properties)
    schema = {
        "column_names": table.column_names,
        "column_schema": table_schema,
        "backend": backend,
        "rows": table.num_rows,
        "size": arrow_table.nbytes,
    }

    result = TableMetadata.construct(**schema)
    return result