Galen Seilis
11/06/2024, 10:13 PM"""Command line tools for manipulating a Kedro project.
Intended to be invoked via `kedro`."""
import click
from kedro.framework.cli.project import (
ASYNC_ARG_HELP,
CONFIG_FILE_HELP,
CONF_SOURCE_HELP,
FROM_INPUTS_HELP,
FROM_NODES_HELP,
LOAD_VERSION_HELP,
NODE_ARG_HELP,
PARAMS_ARG_HELP,
PIPELINE_ARG_HELP,
RUNNER_ARG_HELP,
TAG_ARG_HELP,
TO_NODES_HELP,
TO_OUTPUTS_HELP,
)
from kedro.framework.cli.utils import (
CONTEXT_SETTINGS,
_config_file_callback,
_split_params,
_split_load_versions,
env_option,
split_string,
split_node_names,
)
from kedro.framework.session import KedroSession
from kedro.utils import load_obj
@click.group(context_settings=CONTEXT_SETTINGS, name=__file__)
def cli():
"""Command line tools for manipulating a Kedro project."""
@cli.command()
@click.option(
"--from-inputs", type=str, default="", help=FROM_INPUTS_HELP, callback=split_string
)
@click.option(
"--to-outputs", type=str, default="", help=TO_OUTPUTS_HELP, callback=split_string
)
@click.option(
"--from-nodes", type=str, default="", help=FROM_NODES_HELP, callback=split_node_names
)
@click.option(
"--to-nodes", type=str, default="", help=TO_NODES_HELP, callback=split_node_names
)
@click.option("--nodes", "-n", "node_names", type=str, multiple=True, help=NODE_ARG_HELP)
@click.option(
"--runner", "-r", type=str, default=None, multiple=False, help=RUNNER_ARG_HELP
)
@click.option("--async", "is_async", is_flag=True, multiple=False, help=ASYNC_ARG_HELP)
@env_option
@click.option("--tags", "-t", type=str, multiple=True, help=TAG_ARG_HELP)
@click.option(
"--load-versions",
"-lv",
type=str,
multiple=True,
help=LOAD_VERSION_HELP,
callback=_split_load_versions,
)
@click.option("--pipeline", "-p", type=str, default=None, help=PIPELINE_ARG_HELP)
@click.option(
"--config",
"-c",
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
help=CONFIG_FILE_HELP,
callback=_config_file_callback,
)
@click.option(
"--conf-source",
type=click.Path(exists=True, file_okay=False, resolve_path=True),
help=CONF_SOURCE_HELP,
)
@click.option(
"--params",
type=click.UNPROCESSED,
default="",
help=PARAMS_ARG_HELP,
callback=_split_params,
)
def run(
tags,
env,
runner,
is_async,
node_names,
to_nodes,
from_nodes,
from_inputs,
to_outputs,
load_versions,
pipeline,
config,
conf_source,
params,
):
"""Run the pipeline."""
runner = load_obj(runner or "SequentialRunner", "kedro.runner")
tags = tuple(tags)
node_names = tuple(node_names)
with KedroSession.create(
env=env, conf_source=conf_source, extra_params=params
) as session:
session.run(
tags=tags,
runner=runner(is_async=is_async),
node_names=node_names,
from_nodes=from_nodes,
to_nodes=to_nodes,
from_inputs=from_inputs,
to_outputs=to_outputs,
load_versions=load_versions,
pipeline_name=pipeline,
)
Generally in Python users are supposed to stay away from single underscore prefixed variables, however this example in the docs illustrates using them. When functions like _config_file_callback
appear in user documentation for constructing examples it makes it less clear what is intended for end-users.
Are such methods / functions supposed to be part of the public API?Hall
11/06/2024, 10:13 PMDmitry Sorokin
11/07/2024, 11:48 AMNok Lam Chan
11/07/2024, 1:36 PMkedro
, for kedro run
.
There may be way to ADD new behavior without importing those internal function, but it will be much harder if you want to remove some of the existing function. For example you don't need all the args
but only want to keep some of it, or you are designing a plugin like kedro xxx run
.
The internal imports aren't strictly needed to override the run
command, but they are necessary if you want to replicate the existing behavior.Nok Lam Chan
11/07/2024, 1:43 PMkedro run
), we keep some flexibility for ourselves for the logic to handle parsing CLI etc (as they are less user oriented).
With that said, I think some of those internal functions can be open as public as they are quite stable, but we may need to review them as a whole before we make any change.
Feel free to open an issue about this.