dor zazon
01/10/2023, 9:56 AMdatajoely
01/10/2023, 10:00 AMdor zazon
01/10/2023, 10:03 AMdatajoely
01/10/2023, 10:22 AMdor zazon
01/10/2023, 10:23 AMdatajoely
01/10/2023, 10:30 AM│ /Users/dorzazon/Documents/workspace/egged-kedro/egged/src/egged/pipelines/data_processing/pipeli │
│ ne.py:28 in create_pipeline │
│ │
│ 25 │ │ │ )]) │
│ 26 │ pipelines = [] │
│ 27 │ # get catalog │
│ ❱ 28 │ catalog = _get_catalog() │
│ 29 │ for dataset in catalog.load('params:dataset_names'): │
│ 30 │ │ pipelines.append(pipeline(pipe=template, │
│ 31 │ │ │ │ │ │ │ │ │ │ inputs={"dataset": dataset},
dor zazon
01/10/2023, 10:32 AMdatajoely
01/10/2023, 10:32 AMdor zazon
01/10/2023, 10:35 AMtemplate = pipeline(
[
node(
func=preprocess_df,
inputs=["dataset", 'params:dataset_config', 'params:col_names_config'],
outputs="preprocessed_dataset_name",
name="preprocess_df_node"
)])
pipelines = []
# get catalog
catalog = _get_catalog()
for dataset in catalog.load('params:dataset_names'):
pipelines.append(pipeline(pipe=template,
inputs={"dataset": dataset},
parameters={"params:dataset_config": f'params:{dataset}',
'params:col_names_config': 'params:col_names_config'},
outputs={"preprocessed_dataset_name": f'preprocessed_{dataset}'},
namespace=f'preprocessed_{dataset}'))
# return all pipelines
final_pipeline = pipelines[0]
for pipe in pipelines[1:]:
final_pipeline += pipe