From 4c607894405cb196bd7bc0ec81452e88f2b41d2a Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Wed, 11 Mar 2026 22:13:20 +0000 Subject: [PATCH 1/2] cli: aggiungi filtro --years a run e validate --- tests/test_cli_years_filter.py | 81 ++++++++++++++++++++++++++++++++++ toolkit/cli/cmd_run.py | 16 ++++--- toolkit/cli/cmd_validate.py | 6 ++- toolkit/cli/common.py | 35 +++++++++++++++ 4 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 tests/test_cli_years_filter.py diff --git a/tests/test_cli_years_filter.py b/tests/test_cli_years_filter.py new file mode 100644 index 0000000..38ad919 --- /dev/null +++ b/tests/test_cli_years_filter.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import shutil +from pathlib import Path + +from typer.testing import CliRunner + +from toolkit.cli.app import app + + +def _copy_project_example_multi_year(dst: Path) -> Path: + src = Path("project-example") + shutil.copytree(src, dst) + shutil.rmtree(dst / "_smoke_out", ignore_errors=True) + + config_path = dst / "dataset.yml" + config_text = config_path.read_text(encoding="utf-8") + config_text = config_text.replace(' years: [2022]\n', ' years: [2022, 2023]\n') + config_path.write_text(config_text, encoding="utf-8") + return config_path + + +def test_cli_run_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + runner = CliRunner() + result = runner.invoke( + app, + ["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"], + ) + + assert result.exit_code == 0, result.output + + root = project_dir / "_smoke_out" + raw_2022_dir = root / "data" / "raw" / "project_example" / "2022" + raw_2023_dir = root / "data" / "raw" / "project_example" / "2023" + mart_2022_dir = root / "data" / "mart" / "project_example" / "2022" + mart_2023_dir = root / "data" / "mart" / "project_example" / "2023" + + assert not raw_2022_dir.exists() + assert raw_2023_dir.exists() + assert not mart_2022_dir.exists() + assert mart_2023_dir.exists() + + +def test_cli_validate_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + runner = CliRunner() + + run_result = runner.invoke( + app, + ["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"], + ) + assert run_result.exit_code == 0, run_result.output + + validate_result = runner.invoke( + app, + ["validate", "all", "--config", str(config_path), "--years", "2023", "--strict-config"], + ) + assert validate_result.exit_code == 0, validate_result.output + + +def test_cli_years_filter_rejects_unconfigured_year(tmp_path: Path, monkeypatch) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + runner = CliRunner() + result = runner.invoke( + app, + ["run", "all", "--config", str(config_path), "--years", "2024", "--strict-config"], + ) + + assert result.exit_code != 0 + assert result.exception is not None + assert "Year(s) not configured in dataset.yml: 2024" in str(result.exception) diff --git a/toolkit/cli/cmd_run.py b/toolkit/cli/cmd_run.py index 55a9f0d..168b334 100644 --- a/toolkit/cli/cmd_run.py +++ b/toolkit/cli/cmd_run.py @@ -4,7 +4,7 @@ import typer -from toolkit.cli.common import iter_years, load_cfg_and_logger +from toolkit.cli.common import iter_selected_years, load_cfg_and_logger from toolkit.clean.run import run_clean from toolkit.clean.validate import run_clean_validation from toolkit.cross.run import run_cross_year @@ -115,6 +115,7 @@ def _print_execution_plan(cfg, year: int, layers: list[str], context: RunContext def run_cross_year_step( cfg, *, + years: list[int] | None = None, dry_run: bool = False, logger=None, ) -> None: @@ -123,13 +124,14 @@ def run_cross_year_step( _validate_execution_plan(cfg, "cross_year") output_dir = layer_dataset_dir(cfg.root, "cross", cfg.dataset) + selected_years = list(years) if years is not None else list(cfg.years) if dry_run: typer.echo("Execution Plan") typer.echo(f"dataset: {cfg.dataset}") typer.echo("scope: cross_year") typer.echo("status: DRY_RUN") - typer.echo(f"years: {', '.join(str(year) for year in cfg.years)}") + typer.echo(f"years: {', '.join(str(year) for year in selected_years)}") typer.echo("steps: cross_year") typer.echo(f"output_dir: {output_dir}") typer.echo("") @@ -138,14 +140,14 @@ def run_cross_year_step( logger.info( "RUN cross_year | dataset=%s years=%s base_dir=%s effective_root=%s root_source=%s", cfg.dataset, - ",".join(str(year) for year in cfg.years), + ",".join(str(year) for year in selected_years), cfg.base_dir, cfg.root, cfg.root_source, ) run_cross_year( cfg.dataset, - cfg.years, + selected_years, cfg.root, cfg.cross_year, logger, @@ -262,6 +264,7 @@ def _execute_layer(layer_name: str, target, *args, **kwargs) -> None: def run( step: str = typer.Argument(..., help="raw | clean | mart | cross_year | all"), config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"), + years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"), dry_run: bool = typer.Option(False, "--dry-run", help="Print execution plan without executing"), strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"), ): @@ -271,15 +274,16 @@ def run( strict_config_flag = strict_config if isinstance(strict_config, bool) else False cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag) dry_run_flag = dry_run if isinstance(dry_run, bool) else False + selected_years = iter_selected_years(cfg, years_arg=years) if step not in {"raw", "clean", "mart", "cross_year", "all"}: raise typer.BadParameter("step must be one of: raw, clean, mart, cross_year, all") if step == "cross_year": - run_cross_year_step(cfg, dry_run=dry_run_flag, logger=logger) + run_cross_year_step(cfg, years=selected_years, dry_run=dry_run_flag, logger=logger) return - for year in iter_years(cfg, None): + for year in selected_years: run_year(cfg, year, step=step, dry_run=dry_run_flag, logger=logger) diff --git a/toolkit/cli/cmd_validate.py b/toolkit/cli/cmd_validate.py index 0ba200d..f08bc1d 100644 --- a/toolkit/cli/cmd_validate.py +++ b/toolkit/cli/cmd_validate.py @@ -2,7 +2,7 @@ import typer -from toolkit.cli.common import iter_years, load_cfg_and_logger +from toolkit.cli.common import iter_selected_years, load_cfg_and_logger from toolkit.clean.validate import run_clean_validation from toolkit.mart.validate import run_mart_validation @@ -15,6 +15,7 @@ def _raise_on_failed_summary(summary: dict[str, object]) -> None: def validate( step: str = typer.Argument(..., help="clean | mart | all"), config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"), + years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"), strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"), ): """ @@ -25,8 +26,9 @@ def validate( """ strict_config_flag = strict_config if isinstance(strict_config, bool) else False cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag) + selected_years = iter_selected_years(cfg, years_arg=years) - for year in iter_years(cfg, None): + for year in selected_years: if step == "all": _raise_on_failed_summary(run_clean_validation(cfg, year, logger)) _raise_on_failed_summary(run_mart_validation(cfg, year, logger)) diff --git a/toolkit/cli/common.py b/toolkit/cli/common.py index e56476c..5c76feb 100644 --- a/toolkit/cli/common.py +++ b/toolkit/cli/common.py @@ -31,3 +31,38 @@ def iter_years(cfg, year_arg: int | None = None) -> list[int]: if year_arg not in cfg.years: raise ValueError(f"Year {year_arg} is not configured in dataset.yml") return [year_arg] + + +def iter_selected_years( + cfg, + *, + year_arg: int | None = None, + years_arg: str | None = None, +) -> list[int]: + if year_arg is not None and years_arg is not None: + raise ValueError("Use either --year or --years, not both") + + if years_arg is None: + return iter_years(cfg, year_arg) + + requested: list[int] = [] + for raw_part in years_arg.split(","): + part = raw_part.strip() + if not part: + raise ValueError("Invalid --years value: empty year entry") + try: + year = int(part) + except ValueError as exc: + raise ValueError(f"Invalid --years value: '{part}' is not an integer year") from exc + if year not in requested: + requested.append(year) + + if not requested: + raise ValueError("Invalid --years value: no years provided") + + invalid = [year for year in requested if year not in cfg.years] + if invalid: + listed = ", ".join(str(year) for year in invalid) + raise ValueError(f"Year(s) not configured in dataset.yml: {listed}") + + return requested From 9bfa043043d5edc09775d40365bdb0231c6e4cbd Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Wed, 11 Mar 2026 22:18:29 +0000 Subject: [PATCH 2/2] fix: gestisci OptionInfo di Typer nel filtro years --- tests/test_cli_years_filter.py | 18 ++++++++++++++++++ toolkit/cli/cmd_run.py | 3 ++- toolkit/cli/cmd_validate.py | 3 ++- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/test_cli_years_filter.py b/tests/test_cli_years_filter.py index 38ad919..8e6699f 100644 --- a/tests/test_cli_years_filter.py +++ b/tests/test_cli_years_filter.py @@ -79,3 +79,21 @@ def test_cli_years_filter_rejects_unconfigured_year(tmp_path: Path, monkeypatch) assert result.exit_code != 0 assert result.exception is not None assert "Year(s) not configured in dataset.yml: 2024" in str(result.exception) + + +def test_cli_run_all_without_years_keeps_direct_python_invocation_compat( + tmp_path: Path, + monkeypatch, +) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + + from toolkit.cli.cmd_run import run as run_cmd + + run_cmd(step="all", config=str(config_path)) + + root = project_dir / "_smoke_out" + assert (root / "data" / "raw" / "project_example" / "2022").exists() + assert (root / "data" / "raw" / "project_example" / "2023").exists() diff --git a/toolkit/cli/cmd_run.py b/toolkit/cli/cmd_run.py index 168b334..87caf85 100644 --- a/toolkit/cli/cmd_run.py +++ b/toolkit/cli/cmd_run.py @@ -274,7 +274,8 @@ def run( strict_config_flag = strict_config if isinstance(strict_config, bool) else False cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag) dry_run_flag = dry_run if isinstance(dry_run, bool) else False - selected_years = iter_selected_years(cfg, years_arg=years) + years_arg = years if isinstance(years, str) else None + selected_years = iter_selected_years(cfg, years_arg=years_arg) if step not in {"raw", "clean", "mart", "cross_year", "all"}: raise typer.BadParameter("step must be one of: raw, clean, mart, cross_year, all") diff --git a/toolkit/cli/cmd_validate.py b/toolkit/cli/cmd_validate.py index f08bc1d..4b2c8f7 100644 --- a/toolkit/cli/cmd_validate.py +++ b/toolkit/cli/cmd_validate.py @@ -26,7 +26,8 @@ def validate( """ strict_config_flag = strict_config if isinstance(strict_config, bool) else False cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag) - selected_years = iter_selected_years(cfg, years_arg=years) + years_arg = years if isinstance(years, str) else None + selected_years = iter_selected_years(cfg, years_arg=years_arg) for year in selected_years: if step == "all":