Gavin Lou
11/26/2023, 6:42 AMkedro run --nodes=preprocess_companies_node
I keep getting error saying pandas.CSVDataset is not found although I have re-installed and verified that pandas and kedros are installed. The error message is included below
[11/26/23 14:40:34] INFO Kedro project spaceflights session.py:365
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/io/core.py:4 │
│ 01 in parse_dataset_definition │
│ │
│ 398 │ │ │
│ 399 │ │ trials = (_load_obj(class_path) for class_path in class_paths) │
│ 400 │ │ try: │
│ ❱ 401 │ │ │ class_obj = next(obj for obj in trials if obj is not None) │
│ 402 │ │ except StopIteration as exc: │
│ 403 │ │ │ raise DatasetError( │
│ 404 │ │ │ │ f"Class '{class_obj}' not found or one of its dependen │
╰──────────────────────────────────────────────────────────────────────────────╯
StopIteration
The above exception was the direct cause of the following exception:
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/io/core.py:1 │
│ 52 in from_config │
│ │
│ 149 │ │ │
│ 150 │ │ """ │
│ 151 │ │ try: │
│ ❱ 152 │ │ │ class_obj, config = parse_dataset_definition( │
│ 153 │ │ │ │ config, load_version, save_version │
│ 154 │ │ │ ) │
│ 155 │ │ except Exception as exc: │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/io/core.py:4 │
│ 03 in parse_dataset_definition │
│ │
│ 400 │ │ try: │
│ 401 │ │ │ class_obj = next(obj for obj in trials if obj is not None) │
│ 402 │ │ except StopIteration as exc: │
│ ❱ 403 │ │ │ raise DatasetError( │
│ 404 │ │ │ │ f"Class '{class_obj}' not found or one of its dependen │
│ 405 │ │ │ │ f"has not been installed." │
│ 406 │ │ │ ) from exc │
╰──────────────────────────────────────────────────────────────────────────────╯
DatasetError: Class 'pandas.CSVDataset' not found or one of its dependencies has
not been installed.
The above exception was the direct cause of the following exception:
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /Users/gavinlou/opt/anaconda3/bin/kedro:8 in <module> │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/framework/cl │
│ i/cli.py:211 in main │
│ │
│ 208 │ """ │
│ 209 │ _init_plugins() │
│ 210 │ cli_collection = KedroCLI(project_path=Path.cwd()) │
│ ❱ 211 │ cli_collection() │
│ 212 │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/click/core.py:1157 │
│ in __call__ │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/framework/cl │
│ i/cli.py:139 in main │
│ │
│ 136 │ │ ) │
│ 137 │ │ │
│ 138 │ │ try: │
│ ❱ 139 │ │ │ super().main( │
│ 140 │ │ │ │ args=args, │
│ 141 │ │ │ │ prog_name=prog_name, │
│ 142 │ │ │ │ complete_var=complete_var, │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/click/core.py:1078 │
│ in main │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/click/core.py:1688 │
│ in invoke │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/click/core.py:1434 │
│ in invoke │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/click/core.py:783 │
│ in invoke │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/framework/cl │
│ i/project.py:453 in run │
│ │
│ 450 │ with KedroSession.create( │
│ 451 │ │ env=env, conf_source=conf_source, extra_params=params │
│ 452 │ ) as session: │
│ ❱ 453 │ │ session.run( │
│ 454 │ │ │ tags=tag, │
│ 455 │ │ │ runner=runner(is_async=is_async), │
│ 456 │ │ │ node_names=node_names, │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/framework/se │
│ ssion/session.py:418 in run │
│ │
│ 415 │ │ │ "runner": getattr(runner, "__name__", str(runner)), │
│ 416 │ │ } │
│ 417 │ │ │
│ ❱ 418 │ │ catalog = context._get_catalog( # noqa: protected-access │
│ 419 │ │ │ save_version=save_version, │
│ 420 │ │ │ load_versions=load_versions, │
│ 421 │ │ ) │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/framework/co │
│ ntext/context.py:277 in _get_catalog │
│ │
│ 274 │ │ ) │
│ 275 │ │ conf_creds = self._get_config_credentials() │
│ 276 │ │ │
│ ❱ 277 │ │ catalog = settings.DATA_CATALOG_CLASS.from_config( │
│ 278 │ │ │ catalog=conf_catalog, │
│ 279 │ │ │ credentials=conf_creds, │
│ 280 │ │ │ load_versions=load_versions, │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/io/data_cata │
│ log.py:311 in from_config │
│ │
│ 308 │ │ │ │ ds_layer = ds_config.pop("layer", None) │
│ 309 │ │ │ │ if ds_layer is not None: │
│ 310 │ │ │ │ │ layers[ds_layer].add(ds_name) │
│ ❱ 311 │ │ │ │ data_sets[ds_name] = AbstractDataset.from_config( │
│ 312 │ │ │ │ │ ds_name, ds_config, load_versions.get(ds_name), sa │
│ 313 │ │ │ │ ) │
│ 314 │ │ dataset_layers = layers or None │
│ │
│ /Users/gavinlou/opt/anaconda3/lib/python3.8/site-packages/kedro/io/core.py:1 │
│ 56 in from_config │
│ │
│ 153 │ │ │ │ config, load_version, save_version │
│ 154 │ │ │ ) │
│ 155 │ │ except Exception as exc: │
│ ❱ 156 │ │ │ raise DatasetError( │
│ 157 │ │ │ │ f"An exception occurred when parsing config " │
│ 158 │ │ │ │ f"for dataset '{name}':\n{str(exc)}" │
│ 159 │ │ │ ) from exc │
╰──────────────────────────────────────────────────────────────────────────────╯
DatasetError: An exception occurred when parsing config for dataset 'companies':
Class 'pandas.CSVDataset' not found or one of its dependencies has not been
installed.
Please offer some ideas to what is happening here. I would really like to go through this tutorial.William Caicedo
11/26/2023, 7:00 AMpip install kedro[pandas.CSVDataSet]
?William Caicedo
11/26/2023, 7:02 AMkedro-datasets[pandas.CSVDataSet]
in your requirements.txt
? If not, that might be the cause of it.Gavin Lou
11/26/2023, 12:34 PM