Shu-Chun Wu
11/04/2024, 3:53 PMAnkita Katiyar
11/04/2024, 4:00 PMclass_name
filled inShu-Chun Wu
11/05/2024, 12:43 PMShu-Chun Wu
11/05/2024, 12:53 PM"{class_name}_data":
type: partitions.PartitionedDataset
filepath: data/01_raw/B4CD/{class_name}
dataset:
type: pillow.ImageDataset
class_mapping:
type: pickle.PickleDataset
filepath: data/02_intermediate/class_mapping.pkl
in pipeline.py
def create_pipeline(**kwargs) -> Pipeline:
return pipeline(
[
node(
func=rename_files_in_directory,
inputs=["params:basepath"],
outputs=["class_mapping"],
name="rename_files_in_directory",
),
node(
func=convert_to_np,
inputs=["7_RADC1700-Crack_data", "params:num_classes"],
outputs=["images", "labels"],
name="convert_to_np",
)
])
in nodes.py
def rename_files_in_directory(basepath):
class_mapping ={}
for folder_name in os.listdir(basepath):
folder_path = os.path.join(basepath, folder_name)
class_index = "".join(folder_name.split("_", 2)[0])
class_mapping[class_index] = folder_name
if os.path.isdir(folder_path):
rename_files_in_directory(folder_path)
else:
file_name, file_extension = os.path.splitext(folder_name)
new_file_name = class_index + '_' + file_name + file_extension
os.rename(folder_path, os.path.join(basepath, new_file_name))
return class_mapping
def convert_to_np(part, num_classes=7):
images = []
labels = []
for file, func in part.items():
image = func()
images.append(image)
labels.append(file[:1])
images = np.array(images, dtype=np.int64)
labels = np.array(labels, dtype=np.int64)
labels = to_categorical(labels, num_classes=num_classes)
return images, labels
But after I run kedro run --to-nodes='rename_files_in_directory'
I got this erro => ValueError: Pipeline does not contain nodes named ["'rename_files_in_directory'"].
And after I run kedro catalog list
, I didn't get all dataset. (I have 7 folders.)