Lukas Innig
05/28/2024, 12:48 PMdatajoely
05/28/2024, 12:48 PMdatajoely
05/28/2024, 12:49 PMnode.inputs
You can then cross reference that against the catalog.list()
datajoely
05/28/2024, 12:49 PMparam:
datajoely
05/28/2024, 12:51 PM[node.inputs for node in pipeline.nodes]
<- filter to those inputs prefixed by params
reduce this into a set of all parameters referenced by all nodes
set([x for x in catalog.list() if x.startswith('param")]) - referenced_params
Lukas Innig
05/28/2024, 12:52 PMLukas Innig
05/28/2024, 1:00 PMdatajoely
05/28/2024, 1:00 PMcatalog.datasets.__dict__
may be helpfulNok Lam Chan
05/28/2024, 1:04 PMdatajoely
05/28/2024, 1:05 PMNok Lam Chan
05/28/2024, 1:06 PMNok Lam Chan
05/28/2024, 1:06 PMLukas Innig
05/28/2024, 1:29 PMdef parse_params(param_list):
"""Parse the parameter list into a nested dictionary structure."""
param_dict = {}
for param in param_list:
parts = param.split(':')[1].split('.')
current_level = param_dict
for part in parts:
if part not in current_level:
current_level[part] = {}
current_level = current_level[part]
return param_dict
def mark_used_params(used, catalog):
"""Recursively mark parameters and their children as used if referenced."""
for key, value in used.items():
if key in catalog:
set_used_recursively(catalog[key])
def set_used_recursively(node):
"""Set the '_used' flag recursively on all child nodes."""
node['_used'] = True
for key, sub_node in node.items():
if isinstance(sub_node, dict):
set_used_recursively(sub_node)
def is_any_parent_used(catalog, path):
"""Check if any parent node in the path is marked as used."""
current_level = catalog
for part in path[:-1]: # Examine all but the last part of the path
if part in current_level and isinstance(current_level, dict):
current_level = current_level[part]
if '_used' in current_level:
return True
else:
return False
return False
def find_unused_params(catalog, path=[]):
"""Recursively find unused parameters."""
unused = []
for key, value in catalog.items():
new_path = path + [key] # Update the path to include the current key
if isinstance(value, dict):
unused.extend(find_unused_params(value, new_path))
elif not isinstance(value, dict):
continue # Skip non-dictionary items to avoid type errors
if '_used' not in value and not is_any_parent_used(catalog, new_path):
unused.append('.'.join(new_path))
return unused
all_inputs = []
for pipeline in pipelines.values():
all_inputs.extend([i for node in pipeline.nodes for i in node.inputs if i.startswith("params:")])
all_used_inputs = set(all_inputs)
all_defined_params = set([c for c in catalog.list() if c.startswith("params:")])
used_param_dict = parse_params(all_used_inputs)
catalog_param_dict = parse_params(all_defined_params)
mark_used_params(used_param_dict, catalog_param_dict)
unused_params = find_unused_params(catalog_param_dict)
unused_params
Lukas Innig
05/28/2024, 1:30 PM