Skip to content

module_types

table.filters

                                                                                
 Documentation                                                                  
                          -- n/a --                                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          filter_n…   string   The name    yes                  
                                               of the                           
                                               filter.                          
                                                                                
 Python class                                                                   
                          python_class_name    TableFiltersModule               
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              filter_name: str = self.get_config_value("filt…   
                              data_type_data = self.__class__.get_supported_…   
                              data_type = data_type_data["type"]                
                              # data_type_config = data_type_data["type_conf…   
                              # TODO: ensure value is of the right type?        
                                                                                
                              source_obj = inputs.get_value_obj("value")        
                                                                                
                              func_name = f"filter__{filter_name}"              
                              if not hasattr(self, func_name):                  
                                  raise Exception(                              
                                      f"Can't apply filter '{filter_name}': …   
                                  )                                             
                                                                                
                              func = getattr(self, func_name)                   
                              # TODO: check signature?                          
                                                                                
                              filter_inputs = {}                                
                              for k, v in inputs.items():                       
                                  if k == data_type:                            
                                      continue                                  
                                  filter_inputs[k] = v.data                     
                                                                                
                              result = func(value=source_obj, filter_inputs=   
                                                                                
                              if result is None:                                
                                  outputs.set_value("value", source_obj)        
                              else:                                             
                                  outputs.set_value("value", result)            
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

render.database

                                                                                
 Documentation                                                                  
                          -- n/a --                                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          source_t…   string   The         yes                  
                                               (kiara)                          
                                               data type                        
                                               to be                            
                                               rendered.                        
                                                                                
                          target_t…   string   The         yes                  
                                               (kiara)                          
                                               data type                        
                                               of210 the                        
                                               rendered                         
                                               result.                          
                                                                                
 Python class                                                                   
                          python_class_name    RenderDatabaseModule             
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              source_type = self.get_config_value("source_ty…   
                              target_type = self.get_config_value("target_ty…   
                                                                                
                              value: Value = inputs.get_value_obj("value")      
                                                                                
                              render_scene: DictModel = inputs.get_value_dat…   
                              if render_scene:                                  
                                  rc = render_scene.dict_data                   
                              else:                                             
                                  rc = {}                                       
                                                                                
                              func_name = f"render__{source_type}__as__{targ…   
                                                                                
                              func = getattr(self, func_name)                   
                              result = func(value=value, render_config=rc)      
                              if isinstance(result, RenderValueResult):         
                                  render_scene_result: RenderValueResult = r…   
                              else:                                             
                                  render_scene_result = RenderValueResult(      
                                      value_id=value.value_id,                  
                                      render_config=rc,                         
                                      render_manifest=self.manifest.manifest…   
                                      rendered=result,                          
                                      related_scenes={},                        
                                  )                                             
                              render_scene_result.manifest_lookup[self.manif…   
                                                                                
                              outputs.set_value("render_value_result", rende…   
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

render.table

                                                                                
 Documentation                                                                  
                          -- n/a --                                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          source_t…   string   The         yes                  
                                               (kiara)                          
                                               data type                        
                                               to be                            
                                               rendered.                        
                                                                                
                          target_t…   string   The         yes                  
                                               (kiara)                          
                                               data type                        
                                               of210 the                        
                                               rendered                         
                                               result.                          
                                                                                
 Python class                                                                   
                          python_class_name    RenderTableModule                
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              source_type = self.get_config_value("source_ty…   
                              target_type = self.get_config_value("target_ty…   
                                                                                
                              value: Value = inputs.get_value_obj("value")      
                                                                                
                              render_scene: DictModel = inputs.get_value_dat…   
                              if render_scene:                                  
                                  rc = render_scene.dict_data                   
                              else:                                             
                                  rc = {}                                       
                                                                                
                              func_name = f"render__{source_type}__as__{targ…   
                                                                                
                              func = getattr(self, func_name)                   
                              result = func(value=value, render_config=rc)      
                              if isinstance(result, RenderValueResult):         
                                  render_scene_result: RenderValueResult = r…   
                              else:                                             
                                  render_scene_result = RenderValueResult(      
                                      value_id=value.value_id,                  
                                      render_config=rc,                         
                                      render_manifest=self.manifest.manifest…   
                                      rendered=result,                          
                                      related_scenes={},                        
                                  )                                             
                              render_scene_result.manifest_lookup[self.manif…   
                                                                                
                              outputs.set_value("render_value_result", rende…   
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

export.table

                                                                                
 Documentation                                                                  
                          Export table data items.                              
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          source_t…   string   The type    yes                  
                                               of the                           
                                               source                           
                                               data that                        
                                               is going                         
                                               to be                            
                                               exported.                        
                                                                                
                          target_p…   string   The name    yes                  
                                               of the                           
                                               target                           
                                               profile.                         
                                               Used to                          
                                               distingu…                        
                                               different                        
                                               target                           
                                               formats                          
                                               for the                          
                                               same data                        
                                               type.                            
                                                                                
 Python class                                                                   
                          python_class_name    ExportTableModule                
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              target_profile: str = self.get_config_value("t…   
                              source_type: str = self.get_config_value("sour…   
                                                                                
                              export_metadata = inputs.get_value_data("expor…   
                                                                                
                              source_obj = inputs.get_value_obj(source_type)    
                              source = source_obj.data                          
                                                                                
                              func_name = f"export__{source_type}__as__{targ…   
                              if not hasattr(self, func_name):                  
                                  raise Exception(                              
                                      f"Can't export '{source_type}' value: …   
                                  )                                             
                                                                                
                              base_path = inputs.get_value_data("base_path")    
                              if base_path is None:                             
                                  base_path = os.getcwd()                       
                              name = inputs.get_value_data("name")              
                              if not name:                                      
                                  name = str(source_obj.value_id)               
                                                                                
                              func = getattr(self, func_name)                   
                              # TODO: check signature?                          
                                                                                
                              base_path = os.path.abspath(base_path)            
                              os.makedirs(base_path, exist_ok=True)             
                              result = func(value=source, base_path=base_pat…   
                                                                                
                              if isinstance(result, Mapping):                   
                                  result = DataExportResult(**result)           
                              elif isinstance(result, str):                     
                                  result = DataExportResult(files=[result])     
                                                                                
                              if not isinstance(result, DataExportResult):      
                                  raise KiaraProcessingException(               
                                      f"Can't export value: invalid result t…   
                                  )                                             
                                                                                
                              if export_metadata:                               
                                  metadata_file = Path(os.path.join(base_pat…   
                                  value_info = source_obj.create_info()         
                                  value_json = value_info.json()                
                                  metadata_file.write_text(value_json)          
                                                                                
                                  result.files.append(metadata_file.as_posix…   
                                                                                
                              # schema = ValueSchema(type=self.get_target_va…   
                                                                                
                              # value_lineage = ValueLineage.from_module_and…   
                              #     module=self, output_name=output_key, inp…   
                              # )                                               
                              # value: Value = self._kiara.data_registry.reg…   
                              #     value_data=result, value_schema=schema, …   
                              # )                                               
                                                                                
                              outputs.set_value("export_details", result)       
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

load.array

                                                                                
 Documentation                                                                  
                          Deserialize array data.                               
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          serializ…   string   The name    yes                  
                                               of the                           
                                               serializ…                        
                                               profile                          
                                               used to                          
                                               serialize                        
                                               the                              
                                               source                           
                                               value.                           
                                                                                
                          target_p…   string   The         yes                  
                                               profile                          
                                               name of                          
                                               the                              
                                               de-seria…                        
                                               result                           
                                               data.                            
                                                                                
                          value_ty…   string   The value   yes                  
                                               type of                          
                                               the                              
                                               actual                           
                                               (unseria…                        
                                               value.                           
                                                                                
 Python class                                                                   
                          python_class_name    DeserializeArrayModule           
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              value_type = self.get_config_value("value_type…   
                              serialized_value = inputs.get_value_obj(value_…   
                              config = inputs.get_value_obj("deserialization…   
                                                                                
                              target_profile = self.get_config_value("target…   
                              func_name = f"to__{target_profile}"               
                              func = getattr(self, func_name)                   
                                                                                
                              if config.is_set:                                 
                                  _config = config.data                         
                              else:                                             
                                  _config = {}                                  
                                                                                
                              result: Any = func(data=serialized_value.seria…   
                              outputs.set_value("python_object", result)        
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

load.database

                                                                                
 Documentation                                                                  
                          -- n/a --                                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          serializ…   string   The name    yes                  
                                               of the                           
                                               serializ…                        
                                               profile                          
                                               used to                          
                                               serialize                        
                                               the                              
                                               source                           
                                               value.                           
                                                                                
                          target_p…   string   The         yes                  
                                               profile                          
                                               name of                          
                                               the                              
                                               de-seria…                        
                                               result                           
                                               data.                            
                                                                                
                          value_ty…   string   The value   yes                  
                                               type of                          
                                               the                              
                                               actual                           
                                               (unseria…                        
                                               value.                           
                                                                                
 Python class                                                                   
                          python_class_name    LoadDatabaseFromDiskModule       
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              value_type = self.get_config_value("value_type…   
                              serialized_value = inputs.get_value_obj(value_…   
                              config = inputs.get_value_obj("deserialization…   
                                                                                
                              target_profile = self.get_config_value("target…   
                              func_name = f"to__{target_profile}"               
                              func = getattr(self, func_name)                   
                                                                                
                              if config.is_set:                                 
                                  _config = config.data                         
                              else:                                             
                                  _config = {}                                  
                                                                                
                              result: Any = func(data=serialized_value.seria…   
                              outputs.set_value("python_object", result)        
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

load.table

                                                                                
 Documentation                                                                  
                          -- n/a --                                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          serializ…   string   The name    yes                  
                                               of the                           
                                               serializ…                        
                                               profile                          
                                               used to                          
                                               serialize                        
                                               the                              
                                               source                           
                                               value.                           
                                                                                
                          target_p…   string   The         yes                  
                                               profile                          
                                               name of                          
                                               the                              
                                               de-seria…                        
                                               result                           
                                               data.                            
                                                                                
                          value_ty…   string   The value   yes                  
                                               type of                          
                                               the                              
                                               actual                           
                                               (unseria…                        
                                               value.                           
                                                                                
 Python class                                                                   
                          python_class_name    DeserializeTableModule           
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              value_type = self.get_config_value("value_type…   
                              serialized_value = inputs.get_value_obj(value_…   
                              config = inputs.get_value_obj("deserialization…   
                                                                                
                              target_profile = self.get_config_value("target…   
                              func_name = f"to__{target_profile}"               
                              func = getattr(self, func_name)                   
                                                                                
                              if config.is_set:                                 
                                  _config = config.data                         
                              else:                                             
                                  _config = {}                                  
                                                                                
                              result: Any = func(data=serialized_value.seria…   
                              outputs.set_value("python_object", result)        
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

parse.date_array

                                                                                
 Documentation                                                                  
                          Create an array of date objects from an array of      
                          strings.                                              
                                                                                
                          This module is very simplistic at the moment, more    
                          functionality and options will be added in the        
                          future.                                               
                                                                                
                          At its core, this module uses the standard parser     
                          from the dateutil package to parse strings into       
                          dates. As this parser can't handle complex strings,   
                          the input strings can be pre-processed in the         
                          following ways:                                       
                                                                                
                          'cut' non-relevant parts of the string (using      
                             'min_index' & 'max_index' input/config options)    
                          remove matching tokens from the string, and        
                             replace them with a single whitespace (using the   
                             'remove_tokens' option)                            
                                                                                
                          By default, if an input string can't be parsed this   
                          module will raise an exception. This can be           
                          prevented by setting this modules 'force_non_null'    
                          config option or input to 'False', in which case      
                          un-parsable strings will appear as 'NULL' value in    
                          the resulting array.                                  
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field      Type      Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          add_inp…   boolean   If set to   no         true      
                                               'True',                          
                                               parse                            
                                               options                          
                                               will be                          
                                               available                        
                                               as                               
                                               inputs.                          
                                                                                
                          constan…   object    Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults   object    Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          force_n…   boolean   If set to   no         true      
                                               'True',                          
                                               raise an                         
                                               error if                         
                                               any of                           
                                               the                              
                                               strings                          
                                               in the                           
                                               array                            
                                               can't be                         
                                               parsed.                          
                                                                                
                          input_f…   array     If not      no                   
                                               empty,                           
                                               only add                         
                                               the                              
                                               fields                           
                                               specified                        
                                               in here                          
                                               to the                           
                                               module                           
                                               inputs                           
                                               schema.                          
                                                                                
                          max_ind…   integer   The         no                   
                                               maximum                          
                                               index                            
                                               until                            
                                               whic to                          
                                               parse the                        
                                               string(s…                        
                                                                                
                          min_ind…   integer   The         no                   
                                               minimum                          
                                               index                            
                                               from                             
                                               where to                         
                                               start                            
                                               parsing                          
                                               the                              
                                               string(s…                        
                                                                                
                          remove_…   array     A list of   no                   
                                               tokens/c…                        
                                               to                               
                                               replace                          
                                               with a                           
                                               single                           
                                               white-sp…                        
                                               before                           
                                               parsing                          
                                               the                              
                                               input.                           
                                                                                
 Python class                                                                   
                          python_class_name    ExtractDateModule                
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              import polars as pl                               
                              import pyarrow as pa                              
                              from dateutil import parser                       
                                                                                
                              force_non_null: bool = self.get_data_for_field(   
                                  field_name="force_non_null", inputs=inputs    
                              )                                                 
                              min_pos: Union[None, int] = self.get_data_for_…   
                                  field_name="min_index", inputs=inputs         
                              )                                                 
                              if min_pos is None:                               
                                  min_pos = 0                                   
                              max_pos: Union[None, int] = self.get_data_for_…   
                                  field_name="max_index", inputs=inputs         
                              )                                                 
                              remove_tokens: Iterable[str] = self.get_data_f…   
                                  field_name="remove_tokens", inputs=inputs     
                              )                                                 
                                                                                
                              def parse_date(_text: str):                       
                                                                                
                                  text = _text                                  
                                  if min_pos:                                   
                                      try:                                      
                                          text = text[min_pos:]  # type: ign…   
                                      except Exception:                         
                                          return None                           
                                  if max_pos:                                   
                                      try:                                      
                                          text = text[0 : max_pos - min_pos]…   
                                      except Exception:                         
                                          pass                                  
                                                                                
                                  if remove_tokens:                             
                                      for t in remove_tokens:                   
                                          text = text.replace(t, " ")           
                                                                                
                                  try:                                          
                                      d_obj = parser.parse(text, fuzzy=True)    
                                  except Exception as e:                        
                                      if force_non_null:                        
                                          raise KiaraProcessingException(e)     
                                      return None                               
                                                                                
                                  if d_obj is None:                             
                                      if force_non_null:                        
                                          raise KiaraProcessingException(       
                                              f"Can't parse date from string…   
                                          )                                     
                                      return None                               
                                                                                
                                  return d_obj                                  
                                                                                
                              value = inputs.get_value_obj("array")             
                              array: KiaraArray = value.data                    
                                                                                
                              series = pl.Series(name="tokens", values=array   
                              job_log.add_log(f"start parsing date for {len(…   
                              result = series.apply(parse_date)                 
                              job_log.add_log(f"finished parsing date for {l…   
                              result_array = result.to_arrow()                  
                                                                                
                              # TODO: remove this cast once the array data t…   
                              chunked = pa.chunked_array(result_array)          
                              outputs.set_values(date_array=chunked)            
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

create.database

                                                                                
 Documentation                                                                  
                          -- n/a --                                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type      Descrip…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object    Value      no                   
                                                constan…                        
                                                for this                        
                                                module.                         
                                                                                
                          defaults    object    Value      no                   
                                                defaults                        
                                                for this                        
                                                module.                         
                                                                                
                          ignore_e…   boolean   Whether    no         false     
                                                to                              
                                                ignore                          
                                                convert                         
                                                errors                          
                                                and omit                        
                                                the                             
                                                failed                          
                                                items.                          
                                                                                
                          include_…   boolean   When       no         false     
                                                includi…                        
                                                source                          
                                                metadat…                        
                                                whether                         
                                                to also                         
                                                include                         
                                                the                             
                                                original                        
                                                raw                             
                                                (string)                        
                                                content.                        
                                                                                
                          include_…   boolean   Whether    no                   
                                                to                              
                                                include                         
                                                a table                         
                                                with                            
                                                metadata                        
                                                about                           
                                                the                             
                                                source                          
                                                files.                          
                                                                                
                          merge_in…   boolean   Whether    no         false     
                                                to merge                        
                                                all csv                         
                                                files                           
                                                into a                          
                                                single                          
                                                table.                          
                                                                                
                          source_t…   string    The        yes                  
                                                value                           
                                                type of                         
                                                the                             
                                                source                          
                                                value.                          
                                                                                
                          target_t…   string    The        yes                  
                                                value                           
                                                type of                         
                                                the                             
                                                target.                         
                                                                                
 Python class                                                                   
                          python_class_name    CreateDatabaseModule             
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              source_type = self.get_config_value("source_ty…   
                              target_type = self.get_config_value("target_ty…   
                                                                                
                              func_name = f"create__{target_type}__from__{so…   
                              func = getattr(self, func_name)                   
                                                                                
                              source_value = inputs.get_value_obj(source_typ…   
                                                                                
                              signature = inspect.signature(func)               
                              if "optional" in signature.parameters:            
                                  optional: Dict[str, Value] = {}               
                                  op_schemas = {}                               
                                  for field, schema in self.inputs_schema.it…   
                                      if field == source_type:                  
                                          continue                              
                                      optional[field] = inputs.get_value_obj…   
                                      op_schemas[field] = schema                
                                  result = func(                                
                                      source_value=source_value,                
                                      optional=ValueMapReadOnly(                
                                          value_items=optional, values_schem…   
                                      ),                                        
                                  )                                             
                              else:                                             
                                  result = func(source_value=source_value)      
                              outputs.set_value(target_type, result)            
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

create.table

                                                                                
 Documentation                                                                  
                          -- n/a --                                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type      Descrip…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object    Value      no                   
                                                constan…                        
                                                for this                        
                                                module.                         
                                                                                
                          defaults    object    Value      no                   
                                                defaults                        
                                                for this                        
                                                module.                         
                                                                                
                          ignore_e…   boolean   Whether    no         false     
                                                to                              
                                                ignore                          
                                                convert                         
                                                errors                          
                                                and omit                        
                                                the                             
                                                failed                          
                                                items.                          
                                                                                
                          source_t…   string    The        yes                  
                                                value                           
                                                type of                         
                                                the                             
                                                source                          
                                                value.                          
                                                                                
                          target_t…   string    The        yes                  
                                                value                           
                                                type of                         
                                                the                             
                                                target.                         
                                                                                
 Python class                                                                   
                          python_class_name    CreateTableModule                
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              source_type = self.get_config_value("source_ty…   
                              target_type = self.get_config_value("target_ty…   
                                                                                
                              func_name = f"create__{target_type}__from__{so…   
                              func = getattr(self, func_name)                   
                                                                                
                              source_value = inputs.get_value_obj(source_typ…   
                                                                                
                              signature = inspect.signature(func)               
                              if "optional" in signature.parameters:            
                                  optional: Dict[str, Value] = {}               
                                  op_schemas = {}                               
                                  for field, schema in self.inputs_schema.it…   
                                      if field == source_type:                  
                                          continue                              
                                      optional[field] = inputs.get_value_obj…   
                                      op_schemas[field] = schema                
                                  result = func(                                
                                      source_value=source_value,                
                                      optional=ValueMapReadOnly(                
                                          value_items=optional, values_schem…   
                                      ),                                        
                                  )                                             
                              else:                                             
                                  result = func(source_value=source_value)      
                              outputs.set_value(target_type, result)            
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

query.database

                                                                                
 Documentation                                                                  
                          Execute a sql query against a (sqlite) database.      
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          query       string   The         no                   
                                               query.                           
                                                                                
 Python class                                                                   
                          python_class_name    QueryDatabaseModule              
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              import pyarrow as pa                              
                                                                                
                              database: KiaraDatabase = inputs.get_value_dat…   
                              query = self.get_config_value("query")            
                              if query is None:                                 
                                  query = inputs.get_value_data("query")        
                                                                                
                              # TODO: make this memory efficent                 
                                                                                
                              result_columns: Dict[str, List[Any]] = {}         
                              with database.get_sqlalchemy_engine().connect(…   
                                  result = con.execute(text(query))             
                                  for r in result:                              
                                      for k, v in dict(r).items():              
                                          result_columns.setdefault(k, []).a…   
                                                                                
                              table = pa.Table.from_pydict(result_columns)      
                              outputs.set_value("query_result", table)          
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

table.cut_column

                                                                                
 Documentation                                                                  
                          Cut off one column from a table, returning an         
                          array.                                                
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
 Python class                                                                   
                          python_class_name    CutColumnModule                  
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              import pyarrow as pa                              
                                                                                
                              column_name: str = inputs.get_value_data("colu…   
                                                                                
                              table_value: Value = inputs.get_value_obj("tab…   
                              table_metadata: KiaraTableMetadata = table_val…   
                                  "metadata.table"                              
                              )                                                 
                                                                                
                              available = table_metadata.table.column_names     
                                                                                
                              if column_name not in available:                  
                                  raise KiaraProcessingException(               
                                      f"Invalid column name '{column_name}'.…   
                                  )                                             
                                                                                
                              table: pa.Table = table_value.data.arrow_table    
                              column = table.column(column_name)                
                                                                                
                              outputs.set_value("array", column)                
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

table.merge

                                                                                
 Documentation                                                                  
                          Create a table from other tables and/or arrays.       
                                                                                
                          This module needs configuration to be set (for        
                          now). It's currently not possible to merge an         
                          arbitrary number of tables/arrays, all tables to be   
                          merged must be specified in the module                
                          configuration.                                        
                                                                                
                          Column names of the resulting table can be            
                          controlled by the 'column_map' configuration, which   
                          takes the desired column name as key, and a           
                          field-name in the following format as value:          
                                                                                
                          '[inputs_schema key]' for inputs of type 'array'   
                          '[inputs_schema_key].orig_column_name' for         
                             inputs of type 'table'                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          column_m…   object   A map       no                   
                                               describi…                        
                                                                                
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          inputs_s…   object   A dict      yes                  
                                               describi…                        
                                               the                              
                                               inputs                           
                                               for this                         
                                               merge                            
                                               process.                         
                                                                                
 Python class                                                                   
                          python_class_name    MergeTableModule                 
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              import pyarrow as pa                              
                                                                                
                              inputs_schema: Dict[str, Any] = self.get_confi…   
                              column_map: Dict[str, str] = self.get_config_v…   
                                                                                
                              sources = {}                                      
                              for field_name in inputs_schema.keys():           
                                  sources[field_name] = inputs.get_value_dat…   
                                                                                
                              len_dict = {}                                     
                              arrays = {}                                       
                                                                                
                              column_map_final = dict(column_map)               
                                                                                
                              for source_key, table_or_array in sources.item…   
                                                                                
                                  if isinstance(table_or_array, KiaraTable):    
                                      rows = table_or_array.num_rows            
                                      for name in table_or_array.column_name…   
                                          array_name = f"{source_key}.{name}"   
                                          if column_map and array_name not i…   
                                              job_log.add_log(                  
                                                  f"Ignoring column '{name}'…   
                                              )                                 
                                              continue                          
                                                                                
                                          column = table_or_array.arrow_tabl…   
                                          arrays[array_name] = column           
                                          if not column_map:                    
                                              if name in column_map_final:      
                                                  raise Exception(              
                                                      f"Can't merge table, d…   
                                                  )                             
                                              column_map_final[name] = array…   
                                                                                
                                  elif isinstance(table_or_array, KiaraArray…   
                                                                                
                                      if column_map and source_key not in co…   
                                          job_log.add_log(                      
                                              f"Ignoring array '{source_key}…   
                                          )                                     
                                          continue                              
                                                                                
                                      rows = len(table_or_array)                
                                      arrays[source_key] = table_or_array.ar…   
                                                                                
                                      if not column_map:                        
                                          if source_key in column_map_final.   
                                              raise Exception(                  
                                                  f"Can't merge table, dupli…   
                                              )                                 
                                          column_map_final[source_key] = sou…   
                                                                                
                                  else:                                         
                                      raise KiaraProcessingException(           
                                          f"Can't merge table: invalid type …   
                                      )                                         
                                                                                
                                  len_dict[source_key] = rows                   
                                                                                
                              all_rows = None                                   
                              for source_key, rows in len_dict.items():         
                                  if all_rows is None:                          
                                      all_rows = rows                           
                                  else:                                         
                                      if all_rows != rows:                      
                                          all_rows = None                       
                                          break                                 
                                                                                
                              if all_rows is None:                              
                                  len_str = ""                                  
                                  for name, rows in len_dict.items():           
                                      len_str = f" {name} ({rows})"             
                                                                                
                                  raise KiaraProcessingException(               
                                      f"Can't merge table, sources have diff…   
                                  )                                             
                                                                                
                              column_names = []                                 
                              columns = []                                      
                              for column_name, ref in column_map_final.items…   
                                  column_names.append(column_name)              
                                  column = arrays[ref]                          
                                  columns.append(column)                        
                                                                                
                              table = pa.Table.from_arrays(arrays=columns, n…   
                                                                                
                              outputs.set_value("table", table)                 
                                                                                
                         ─────────────────────────────────────────────────────  
                                                                                

query.table

                                                                                
 Documentation                                                                  
                          Execute a sql query against an (Arrow) table.         
                                                                                
                          The default relation name for the sql query is        
                          'data', but can be modified by the 'relation_name'    
                          config option/input.                                  
                                                                                
                          If the 'query' module config option is not set,       
                          users can provide their own query, otherwise the      
                          pre-set one will be used.                             
                                                                                
 Author(s)                                                                      
                          Markus Binsteiner   markus@frkl.io                    
                                                                                
 Context                                                                        
                          Tags         tabular                                  
                          Labels       package: kiara_plugin.tabular            
                          References   source_repo:                             
                                       https://github.com/DHARPA-Project/kia…   
                                       documentation:                           
                                       https://DHARPA-Project.github.io/kiar…   
                                                                                
 Module config schema                                                           
                          Field       Type     Descript…   Required   Default   
                         ─────────────────────────────────────────────────────  
                          constants   object   Value       no                   
                                               constants                        
                                               for this                         
                                               module.                          
                                                                                
                          defaults    object   Value       no                   
                                               defaults                         
                                               for this                         
                                               module.                          
                                                                                
                          query       string   The query   no                   
                                               to                               
                                               execute.                         
                                               If not                           
                                               specifie…                        
                                               the user                         
                                               will be                          
                                               able to                          
                                               provide                          
                                               their                            
                                               own.                             
                                                                                
                          relation…   string   The name    no         "data"    
                                               the table                        
                                               is                               
                                               referred                         
                                               to in the                        
                                               sql                              
                                               query. If                        
                                               not                              
                                               specifie…                        
                                               the user                         
                                               will be                          
                                               able to                          
                                               provide                          
                                               their                            
                                               own.                             
                                                                                
 Python class                                                                   
                          python_class_name    QueryTableSQL                    
                          python_module_name   kiara_plugin.tabular.modules.…   
                          full_name            kiara_plugin.tabular.modules.…   
                                                                                
 Processing source code  ─────────────────────────────────────────────────────  
                          def process(self, inputs: ValueMap, outputs: Value…   
                                                                                
                              import duckdb                                     
                                                                                
                              if self.get_config_value("query") is None:        
                                  _query: str = inputs.get_value_data("query…   
                                  _relation_name: str = inputs.get_value_dat…   
                              else:                                             
                                  _query = self.get_config_value("query")       
                                  _relation_name = self.get_config_value("re…   
                                                                                
                              if _relation_name.upper() in RESERVED_SQL_KEYW…   
                                  raise KiaraProcessingException(               
                                      f"Invalid relation name '{_relation_na…   
                                  )                                             
                                                                                
                              _table: KiaraTable = inputs.get_value_data("ta…   
                              rel_from_arrow = duckdb.arrow(_table.arrow_tab…   
                              result: duckdb.DuckDBPyRelation = rel_from_arr…   
                                                                                
                              outputs.set_value("query_result", result.arrow…   
                                                                                
                         ─────────────────────────────────────────────────────