diff --git a/tests/test_cli_years_filter.py b/tests/test_cli_years_filter.py new file mode 100644 index 0000000..8e6699f --- /dev/null +++ b/tests/test_cli_years_filter.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import shutil +from pathlib import Path + +from typer.testing import CliRunner + +from toolkit.cli.app import app + + +def _copy_project_example_multi_year(dst: Path) -> Path: + src = Path("project-example") + shutil.copytree(src, dst) + shutil.rmtree(dst / "_smoke_out", ignore_errors=True) + + config_path = dst / "dataset.yml" + config_text = config_path.read_text(encoding="utf-8") + config_text = config_text.replace(' years: [2022]\n', ' years: [2022, 2023]\n') + config_path.write_text(config_text, encoding="utf-8") + return config_path + + +def test_cli_run_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + runner = CliRunner() + result = runner.invoke( + app, + ["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"], + ) + + assert result.exit_code == 0, result.output + + root = project_dir / "_smoke_out" + raw_2022_dir = root / "data" / "raw" / "project_example" / "2022" + raw_2023_dir = root / "data" / "raw" / "project_example" / "2023" + mart_2022_dir = root / "data" / "mart" / "project_example" / "2022" + mart_2023_dir = root / "data" / "mart" / "project_example" / "2023" + + assert not raw_2022_dir.exists() + assert raw_2023_dir.exists() + assert not mart_2022_dir.exists() + assert mart_2023_dir.exists() + + +def test_cli_validate_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + runner = CliRunner() + + run_result = runner.invoke( + app, + ["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"], + ) + assert run_result.exit_code == 0, run_result.output + + validate_result = runner.invoke( + app, + ["validate", "all", "--config", str(config_path), "--years", "2023", "--strict-config"], + ) + assert validate_result.exit_code == 0, validate_result.output + + +def test_cli_years_filter_rejects_unconfigured_year(tmp_path: Path, monkeypatch) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + runner = CliRunner() + result = runner.invoke( + app, + ["run", "all", "--config", str(config_path), "--years", "2024", "--strict-config"], + ) + + assert result.exit_code != 0 + assert result.exception is not None + assert "Year(s) not configured in dataset.yml: 2024" in str(result.exception) + + +def test_cli_run_all_without_years_keeps_direct_python_invocation_compat( + tmp_path: Path, + monkeypatch, +) -> None: + project_dir = tmp_path / "project-example" + config_path = _copy_project_example_multi_year(project_dir) + + monkeypatch.chdir(tmp_path) + + from toolkit.cli.cmd_run import run as run_cmd + + run_cmd(step="all", config=str(config_path)) + + root = project_dir / "_smoke_out" + assert (root / "data" / "raw" / "project_example" / "2022").exists() + assert (root / "data" / "raw" / "project_example" / "2023").exists() diff --git a/toolkit/cli/cmd_run.py b/toolkit/cli/cmd_run.py index 55a9f0d..87caf85 100644 --- a/toolkit/cli/cmd_run.py +++ b/toolkit/cli/cmd_run.py @@ -4,7 +4,7 @@ import typer -from toolkit.cli.common import iter_years, load_cfg_and_logger +from toolkit.cli.common import iter_selected_years, load_cfg_and_logger from toolkit.clean.run import run_clean from toolkit.clean.validate import run_clean_validation from toolkit.cross.run import run_cross_year @@ -115,6 +115,7 @@ def _print_execution_plan(cfg, year: int, layers: list[str], context: RunContext def run_cross_year_step( cfg, *, + years: list[int] | None = None, dry_run: bool = False, logger=None, ) -> None: @@ -123,13 +124,14 @@ def run_cross_year_step( _validate_execution_plan(cfg, "cross_year") output_dir = layer_dataset_dir(cfg.root, "cross", cfg.dataset) + selected_years = list(years) if years is not None else list(cfg.years) if dry_run: typer.echo("Execution Plan") typer.echo(f"dataset: {cfg.dataset}") typer.echo("scope: cross_year") typer.echo("status: DRY_RUN") - typer.echo(f"years: {', '.join(str(year) for year in cfg.years)}") + typer.echo(f"years: {', '.join(str(year) for year in selected_years)}") typer.echo("steps: cross_year") typer.echo(f"output_dir: {output_dir}") typer.echo("") @@ -138,14 +140,14 @@ def run_cross_year_step( logger.info( "RUN cross_year | dataset=%s years=%s base_dir=%s effective_root=%s root_source=%s", cfg.dataset, - ",".join(str(year) for year in cfg.years), + ",".join(str(year) for year in selected_years), cfg.base_dir, cfg.root, cfg.root_source, ) run_cross_year( cfg.dataset, - cfg.years, + selected_years, cfg.root, cfg.cross_year, logger, @@ -262,6 +264,7 @@ def _execute_layer(layer_name: str, target, *args, **kwargs) -> None: def run( step: str = typer.Argument(..., help="raw | clean | mart | cross_year | all"), config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"), + years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"), dry_run: bool = typer.Option(False, "--dry-run", help="Print execution plan without executing"), strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"), ): @@ -271,15 +274,17 @@ def run( strict_config_flag = strict_config if isinstance(strict_config, bool) else False cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag) dry_run_flag = dry_run if isinstance(dry_run, bool) else False + years_arg = years if isinstance(years, str) else None + selected_years = iter_selected_years(cfg, years_arg=years_arg) if step not in {"raw", "clean", "mart", "cross_year", "all"}: raise typer.BadParameter("step must be one of: raw, clean, mart, cross_year, all") if step == "cross_year": - run_cross_year_step(cfg, dry_run=dry_run_flag, logger=logger) + run_cross_year_step(cfg, years=selected_years, dry_run=dry_run_flag, logger=logger) return - for year in iter_years(cfg, None): + for year in selected_years: run_year(cfg, year, step=step, dry_run=dry_run_flag, logger=logger) diff --git a/toolkit/cli/cmd_validate.py b/toolkit/cli/cmd_validate.py index 0ba200d..4b2c8f7 100644 --- a/toolkit/cli/cmd_validate.py +++ b/toolkit/cli/cmd_validate.py @@ -2,7 +2,7 @@ import typer -from toolkit.cli.common import iter_years, load_cfg_and_logger +from toolkit.cli.common import iter_selected_years, load_cfg_and_logger from toolkit.clean.validate import run_clean_validation from toolkit.mart.validate import run_mart_validation @@ -15,6 +15,7 @@ def _raise_on_failed_summary(summary: dict[str, object]) -> None: def validate( step: str = typer.Argument(..., help="clean | mart | all"), config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"), + years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"), strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"), ): """ @@ -25,8 +26,10 @@ def validate( """ strict_config_flag = strict_config if isinstance(strict_config, bool) else False cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag) + years_arg = years if isinstance(years, str) else None + selected_years = iter_selected_years(cfg, years_arg=years_arg) - for year in iter_years(cfg, None): + for year in selected_years: if step == "all": _raise_on_failed_summary(run_clean_validation(cfg, year, logger)) _raise_on_failed_summary(run_mart_validation(cfg, year, logger)) diff --git a/toolkit/cli/common.py b/toolkit/cli/common.py index e56476c..5c76feb 100644 --- a/toolkit/cli/common.py +++ b/toolkit/cli/common.py @@ -31,3 +31,38 @@ def iter_years(cfg, year_arg: int | None = None) -> list[int]: if year_arg not in cfg.years: raise ValueError(f"Year {year_arg} is not configured in dataset.yml") return [year_arg] + + +def iter_selected_years( + cfg, + *, + year_arg: int | None = None, + years_arg: str | None = None, +) -> list[int]: + if year_arg is not None and years_arg is not None: + raise ValueError("Use either --year or --years, not both") + + if years_arg is None: + return iter_years(cfg, year_arg) + + requested: list[int] = [] + for raw_part in years_arg.split(","): + part = raw_part.strip() + if not part: + raise ValueError("Invalid --years value: empty year entry") + try: + year = int(part) + except ValueError as exc: + raise ValueError(f"Invalid --years value: '{part}' is not an integer year") from exc + if year not in requested: + requested.append(year) + + if not requested: + raise ValueError("Invalid --years value: no years provided") + + invalid = [year for year in requested if year not in cfg.years] + if invalid: + listed = ", ".join(str(year) for year in invalid) + raise ValueError(f"Year(s) not configured in dataset.yml: {listed}") + + return requested