Dustin
01/25/2023, 12:26 AMkedro run
(no error but same console information without hooks). Just wondering do i need to do something to 'reload' settings? orDeepyaman Datta
01/25/2023, 5:26 AMMemoryProfilingHooks
it's logging output instead of printing to screen. Here's a dumb one I just tested that doesn't have any extra dependencies:
from kedro.framework.hooks import hook_impl
import logging
def _normalise_mem_usage(mem_usage):
# memory_profiler < 0.56.0 returns list instead of float
return mem_usage[0] if isinstance(mem_usage, (list, tuple)) else mem_usage
class MemoryProfilingHooks:
def __init__(self):
self._mem_usage = {}
@property
def _logger(self):
return logging.getLogger(self.__class__.__name__)
@hook_impl
def before_dataset_loaded(self, dataset_name: str) -> None:
before_mem_usage = 99
before_mem_usage = _normalise_mem_usage(before_mem_usage)
self._mem_usage[dataset_name] = before_mem_usage
@hook_impl
def after_dataset_loaded(self, dataset_name: str) -> None:
after_mem_usage = 1
# memory_profiler < 0.56.0 returns list instead of float
after_mem_usage = _normalise_mem_usage(after_mem_usage)
print( # Print to console
"Loading %s consumed %2.2fMiB memory",
dataset_name,
after_mem_usage - self._mem_usage[dataset_name],
)
Dustin
01/25/2023, 5:43 AMimport sys
from typing import Any, Dict
import statsd
from kedro.framework.hooks import hook_impl
from kedro.pipeline.node import Node
from memory_profiler import memory_usage
import logging
class PipelineMonitoringHooks:
def __init__(self):
self._timers = {}
self._client = statsd.StatsClient(prefix="kedro")
@hook_impl
def before_node_run(self, node: Node) -> None:
node_timer = self._client.timer(node.name)
node_timer.start()
self._timers[node.short_name] = node_timer
@hook_impl
def after_node_run(self, node: Node, inputs: Dict[str, Any]) -> None:
self._timers[node.short_name].stop()
for dataset_name, dataset_value in inputs.items():
self._client.gauge(dataset_name + "_size", sys.getsizeof(dataset_value))
@hook_impl
def after_pipeline_run(self):
self._client.incr("run")
def _normalise_mem_usage(mem_usage):
# memory_profiler < 0.56.0 returns list instead of float
return mem_usage[0] if isinstance(mem_usage, (list, tuple)) else mem_usage
class MemoryProfilingHooks:
def __init__(self):
self._mem_usage = {}
@property
def _logger(self):
return logging.getLogger(self.__class__.__name__)
@hook_impl
def before_dataset_loaded(self, dataset_name: str) -> None:
before_mem_usage = memory_usage(
-1,
interval=0.1,
max_usage=True,
retval=True,
include_children=True,
)
before_mem_usage = _normalise_mem_usage(before_mem_usage)
self._mem_usage[dataset_name] = before_mem_usage
@hook_impl
def after_dataset_loaded(self, dataset_name: str) -> None:
after_mem_usage = memory_usage(
-1,
interval=0.1,
max_usage=True,
retval=True,
include_children=True,
)
# memory_profiler < 0.56.0 returns list instead of float
after_mem_usage = _normalise_mem_usage(after_mem_usage)
<http://self._logger.info|self._logger.info>(
"Loading %s consumed %2.2fMiB memory",
dataset_name,
after_mem_usage - self._mem_usage[dataset_name],
)
Deepyaman Datta
01/25/2023, 6:07 AMbreakpoint()
in the body of, say, the before_dataset_loaded
hook?Dustin
01/25/2023, 6:15 AMAntony Milne
01/25/2023, 10:18 AMdef _logger
2. Change self._<http://logger.info|logger.info>
to logging.getLogger(__name__).info
kedro run
will look a bit different now, so if you donโt mind updating that at the same time that would be amazing, thank you! ๐Dustin
01/25/2023, 10:42 AM[01/25/23 21:38:23] INFO Loading data from 'example_iris_data' (CSVDataSet)... data_catalog.py:343
INFO Loading example_iris_data consumed 0.99MiB memory hooks.py:67
INFO Loading data from 'parameters' (MemoryDataSet)... data_catalog.py:343
INFO Loading parameters consumed 0.48MiB memory hooks.py:67
INFO Running node: split: split_data([example_iris_data,parameters]) -> [X_train,X_test,y_train,y_test] node.py:327
INFO Saving data to 'X_train' (MemoryDataSet)... data_catalog.py:382
INFO Saving data to 'X_test' (MemoryDataSet)... data_catalog.py:382
INFO Saving data to 'y_train' (MemoryDataSet)... data_catalog.py:382
INFO Saving data to 'y_test' (MemoryDataSet)... data_catalog.py:382
INFO Completed 1 out of 3 tasks sequential_runner.py:85
INFO Loading data from 'X_train' (MemoryDataSet)... data_catalog.py:343
INFO Loading X_train consumed 0.49MiB memory hooks.py:67
INFO Loading data from 'X_test' (MemoryDataSet)...
Antony Milne
01/25/2023, 11:04 AMDustin
01/25/2023, 11:05 AMJo Stichbury
01/25/2023, 2:14 PM