Leslie Wu
07/13/2023, 12:51 PMkedro.io.core.DataSetError: Failed while loading data from data set ExcelDataSet(filepath=my/s3/path/file.xlsx, load_args={'engine': openpyxl, 'sheet_name': Sheet1}, protocol=s3, save_args={'index': False}, writer_args={'engine': xlsxwriter}).
my/s3/path/file.xlsx
Have no issues with other formats - parquet / csv / PDF. Anyone seen this before or have insights to where I am going wrong?
FYI, I am using kedro=0.17.7
Traceback (most recent call last):
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/s3fs/core.py", line 529, in info
out = self._call_s3(self.s3.head_object, kwargs, Bucket=bucket,
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/s3fs/core.py", line 200, in _call_s3
return method(**additional_kwargs)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/botocore/client.py", line 530, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/botocore/client.py", line 964, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (404) when calling the HeadObject operation: Not Found
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/io/core.py", line 182, in load
return self._load()
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/extras/datasets/pandas/excel_dataset.py", line 161, in _load
with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/fsspec/spec.py", line 1009, in open
f = self._open(
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/s3fs/core.py", line 375, in _open
return S3File(self, path, mode, block_size=block_size, acl=acl,
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/s3fs/core.py", line 1096, in __init__
super().__init__(s3, path, mode, block_size, autocommit=autocommit,
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/fsspec/spec.py", line 1351, in __init__
self.size = self.details["size"]
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/fsspec/spec.py", line 1364, in details
self._details = <http://self.fs.info|self.fs.info>(self.path)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/s3fs/core.py", line 546, in info
return super(S3FileSystem, self).info(path)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/fsspec/spec.py", line 599, in info
raise FileNotFoundError(path)
FileNotFoundError: my/s3/path/file.xlsxx
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/opt/conda/envs/delisting/bin/kedro", line 8, in <module>
sys.exit(main())
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/framework/cli/cli.py", line 246, in main
cli_collection()
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/framework/cli/cli.py", line 184, in main
super().main(
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/framework/cli/project.py", line 390, in run
session.run(
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/framework/session/session.py", line 391, in run
run_result = runner.run(filtered_pipeline, catalog, run_id)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/runner/runner.py", line 79, in run
self._run(pipeline, catalog, run_id)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/runner/sequential_runner.py", line 63, in _run
run_node(node, catalog, self._is_async, run_id)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/runner/runner.py", line 192, in run_node
node = _run_node_sequential(node, catalog, run_id)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/runner/runner.py", line 269, in _run_node_sequential
inputs[name] = catalog.load(name)
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/io/data_catalog.py", line 397, in load
result = func()
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/io/core.py", line 595, in load
return super().load()
File "/opt/conda/envs/delisting/lib/python3.8/site-packages/kedro/io/core.py", line 191, in load
raise DataSetError(message) from exc
kedro.io.core.DataSetError: Failed while loading data from data set ExcelDataSet(filepath=my/s3/path/file.xlsx, load_args={'engine': openpyxl, 'sheet_name': Sheet1}, protocol=s3, save_args={'index': False}, writer_args={'engine': xlsxwriter}).
my/s3/path/file.xlsx
FileNotFoundError: my/s3/path/file.xlsx
I can definitely confirm that the file exists in the specified path?Michel van den Berg
07/13/2023, 1:00 PMLeslie Wu
07/13/2023, 1:01 PMMichel van den Berg
07/13/2023, 1:03 PM