Adrien
04/29/2024, 4:38 PMdatajoely
04/29/2024, 5:21 PMPartitionedDataset
?Adrien
04/29/2024, 5:27 PMNok Lam Chan
04/29/2024, 6:31 PMAdrien
04/30/2024, 7:50 AM# Here is what my loader is doing
dataset = (
wds.WebDataset(files, nodesplitter=self.split_by_node)
.decode(
wds.torch_audio,
)
)
def preprocess(sample):
# Do some operations
return sample
# Here is my node code
def compute_node(dataset):
dataset.map(preprocess)
return dataset
# Here is the target node output
def compute_node_and_split(dataset):
dataset.map(preprocess)
# How to split webdataset with kedro without increasing complexity
return split1, split2
Adrien
04/30/2024, 7:52 AMNok Lam Chan
04/30/2024, 8:32 AMNok Lam Chan
04/30/2024, 8:34 AMAdrien
04/30/2024, 9:09 AMAdrien
04/30/2024, 9:09 AMNok Lam Chan
04/30/2024, 9:13 AMAdrien
04/30/2024, 10:09 AMAdrien
04/30/2024, 10:11 AMdef compute_node_and_split(dataset):
dataset.map(preprocess)
# How to split webdataset with kedro without increasing complexity
for sample in dataset:
if condition:
yield {}, sample
else:
yield sample, {}
Nok Lam Chan
04/30/2024, 10:19 AMAdrien
04/30/2024, 10:20 AMAdrien
04/30/2024, 10:20 AMNok Lam Chan
04/30/2024, 10:21 AMNok Lam Chan
04/30/2024, 10:23 AMAdrien
04/30/2024, 10:30 AMAdrien
04/30/2024, 10:34 AMNok Lam Chan
04/30/2024, 10:40 AMAdrien
04/30/2024, 11:23 AMNok Lam Chan
04/30/2024, 11:59 AMAdrien
04/30/2024, 11:59 AMNok Lam Chan
04/30/2024, 12:02 PMNok Lam Chan
04/30/2024, 12:03 PMAdrien
04/30/2024, 12:04 PMAdrien
04/30/2024, 12:05 PMNok Lam Chan
04/30/2024, 12:11 PMAdrien
04/30/2024, 12:12 PMNok Lam Chan
04/30/2024, 12:15 PMAdrien
04/30/2024, 12:19 PMAdrien
04/30/2024, 12:20 PMNok Lam Chan
04/30/2024, 12:29 PMAdrien
04/30/2024, 1:36 PM