Jonghyun Yun
12/16/2024, 7:21 PMfrom kedro.pipeline import Pipeline, pipeline
from ..text_normalizing import create_pipeline as text_cleaning
text_cleaning_pipe = pipeline(
text_cleaning(),
namespace="upsampling",
)
upsampling_text_cleaning_pipe = pipeline(
[text_cleaning_pipe],
inputs={
"upsampling.sceptre_320_1510": "sceptre_320_all",
"upsampling.abbreviation_dict": "abbreviation_dict",
"upsampling.llm_expanded_abbreviation": "llm_expanded_abbreviation",
"upsampling.frequency_dictionary_en": "frequency_dictionary_en",
"upsampling.augmented_term_frequency": "augmented_term_frequency",
},
parameters={
"params:upsampling.sceptre_processing_options.sub_sample_ratio": "params:sceptre_processing_options.sub_sample_ratio",
},
)
Hall
12/16/2024, 7:21 PMRavi Kumar Pilla
12/16/2024, 7:54 PMdef create_upsampling_text_cleaning_pipeline() -> Pipeline:
return pipeline(
text_cleaning(),
namespace="upsampling",
inputs={
"sceptre_320_1510": "sceptre_320_all",
"abbreviation_dict": "abbreviation_dict",
"llm_expanded_abbreviation": "llm_expanded_abbreviation",
"frequency_dictionary_en": "frequency_dictionary_en",
"augmented_term_frequency": "augmented_term_frequency",
},
parameters={
"params:sceptre_processing_options.sub_sample_ratio": "params:sceptre_processing_options.sub_sample_ratio",
},
)
Jonghyun Yun
12/18/2024, 3:16 PM