dataciviclab · Gabrymi93 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/tests/test_cli_years_filter.py b/tests/test_cli_years_filter.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+import shutil
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from toolkit.cli.app import app
+
+
+def _copy_project_example_multi_year(dst: Path) -> Path:
+    src = Path("project-example")
+    shutil.copytree(src, dst)
+    shutil.rmtree(dst / "_smoke_out", ignore_errors=True)
+
+    config_path = dst / "dataset.yml"
+    config_text = config_path.read_text(encoding="utf-8")
+    config_text = config_text.replace('  years: [2022]\n', '  years: [2022, 2023]\n')
+    config_path.write_text(config_text, encoding="utf-8")
+    return config_path
+
+
+def test_cli_run_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None:
+    project_dir = tmp_path / "project-example"
+    config_path = _copy_project_example_multi_year(project_dir)
+
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"],
+    )
+
+    assert result.exit_code == 0, result.output
+
+    root = project_dir / "_smoke_out"
+    raw_2022_dir = root / "data" / "raw" / "project_example" / "2022"
+    raw_2023_dir = root / "data" / "raw" / "project_example" / "2023"
+    mart_2022_dir = root / "data" / "mart" / "project_example" / "2022"
+    mart_2023_dir = root / "data" / "mart" / "project_example" / "2023"
+
+    assert not raw_2022_dir.exists()
+    assert raw_2023_dir.exists()
+    assert not mart_2022_dir.exists()
+    assert mart_2023_dir.exists()
+
+
+def test_cli_validate_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None:
+    project_dir = tmp_path / "project-example"
+    config_path = _copy_project_example_multi_year(project_dir)
+
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+
+    run_result = runner.invoke(
+        app,
+        ["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"],
+    )
+    assert run_result.exit_code == 0, run_result.output
+
+    validate_result = runner.invoke(
+        app,
+        ["validate", "all", "--config", str(config_path), "--years", "2023", "--strict-config"],
+    )
+    assert validate_result.exit_code == 0, validate_result.output
+
+
+def test_cli_years_filter_rejects_unconfigured_year(tmp_path: Path, monkeypatch) -> None:
+    project_dir = tmp_path / "project-example"
+    config_path = _copy_project_example_multi_year(project_dir)
+
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["run", "all", "--config", str(config_path), "--years", "2024", "--strict-config"],
+    )
+
+    assert result.exit_code != 0
+    assert result.exception is not None
+    assert "Year(s) not configured in dataset.yml: 2024" in str(result.exception)
+
+
+def test_cli_run_all_without_years_keeps_direct_python_invocation_compat(
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    project_dir = tmp_path / "project-example"
+    config_path = _copy_project_example_multi_year(project_dir)
+
+    monkeypatch.chdir(tmp_path)
+
+    from toolkit.cli.cmd_run import run as run_cmd
+
+    run_cmd(step="all", config=str(config_path))
+
+    root = project_dir / "_smoke_out"
+    assert (root / "data" / "raw" / "project_example" / "2022").exists()
+    assert (root / "data" / "raw" / "project_example" / "2023").exists()
diff --git a/toolkit/cli/cmd_run.py b/toolkit/cli/cmd_run.py
@@ -4,7 +4,7 @@
 
 import typer
 
-from toolkit.cli.common import iter_years, load_cfg_and_logger
+from toolkit.cli.common import iter_selected_years, load_cfg_and_logger
 from toolkit.clean.run import run_clean
 from toolkit.clean.validate import run_clean_validation
 from toolkit.cross.run import run_cross_year
@@ -115,6 +115,7 @@ def _print_execution_plan(cfg, year: int, layers: list[str], context: RunContext
 def run_cross_year_step(
     cfg,
     *,
+    years: list[int] | None = None,
     dry_run: bool = False,
     logger=None,
 ) -> None:
@@ -123,13 +124,14 @@ def run_cross_year_step(
 
     _validate_execution_plan(cfg, "cross_year")
     output_dir = layer_dataset_dir(cfg.root, "cross", cfg.dataset)
+    selected_years = list(years) if years is not None else list(cfg.years)
 
     if dry_run:
         typer.echo("Execution Plan")
         typer.echo(f"dataset: {cfg.dataset}")
         typer.echo("scope: cross_year")
         typer.echo("status: DRY_RUN")
-        typer.echo(f"years: {', '.join(str(year) for year in cfg.years)}")
+        typer.echo(f"years: {', '.join(str(year) for year in selected_years)}")
         typer.echo("steps: cross_year")
         typer.echo(f"output_dir: {output_dir}")
         typer.echo("")
@@ -138,14 +140,14 @@ def run_cross_year_step(
     logger.info(
         "RUN cross_year | dataset=%s years=%s base_dir=%s effective_root=%s root_source=%s",
         cfg.dataset,
-        ",".join(str(year) for year in cfg.years),
+        ",".join(str(year) for year in selected_years),
         cfg.base_dir,
         cfg.root,
         cfg.root_source,
     )
     run_cross_year(
         cfg.dataset,
-        cfg.years,
+        selected_years,
         cfg.root,
         cfg.cross_year,
         logger,
@@ -262,6 +264,7 @@ def _execute_layer(layer_name: str, target, *args, **kwargs) -> None:
 def run(
     step: str = typer.Argument(..., help="raw | clean | mart | cross_year | all"),
     config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"),
+    years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"),
     dry_run: bool = typer.Option(False, "--dry-run", help="Print execution plan without executing"),
     strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"),
 ):
@@ -271,15 +274,17 @@ def run(
     strict_config_flag = strict_config if isinstance(strict_config, bool) else False
     cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag)
     dry_run_flag = dry_run if isinstance(dry_run, bool) else False
+    years_arg = years if isinstance(years, str) else None
+    selected_years = iter_selected_years(cfg, years_arg=years_arg)
 
     if step not in {"raw", "clean", "mart", "cross_year", "all"}:
         raise typer.BadParameter("step must be one of: raw, clean, mart, cross_year, all")
 
     if step == "cross_year":
-        run_cross_year_step(cfg, dry_run=dry_run_flag, logger=logger)
+        run_cross_year_step(cfg, years=selected_years, dry_run=dry_run_flag, logger=logger)
         return
 
-    for year in iter_years(cfg, None):
+    for year in selected_years:
         run_year(cfg, year, step=step, dry_run=dry_run_flag, logger=logger)
 
 

diff --git a/toolkit/cli/cmd_validate.py b/toolkit/cli/cmd_validate.py
@@ -2,7 +2,7 @@
 
 import typer
 
-from toolkit.cli.common import iter_years, load_cfg_and_logger
+from toolkit.cli.common import iter_selected_years, load_cfg_and_logger
 from toolkit.clean.validate import run_clean_validation
 from toolkit.mart.validate import run_mart_validation
 
@@ -15,6 +15,7 @@ def _raise_on_failed_summary(summary: dict[str, object]) -> None:
 def validate(
     step: str = typer.Argument(..., help="clean | mart | all"),
     config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"),
+    years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"),
     strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"),
 ):
     """
@@ -25,8 +26,10 @@ def validate(
     """
     strict_config_flag = strict_config if isinstance(strict_config, bool) else False
     cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag)
+    years_arg = years if isinstance(years, str) else None
+    selected_years = iter_selected_years(cfg, years_arg=years_arg)
 
-    for year in iter_years(cfg, None):
+    for year in selected_years:
         if step == "all":
             _raise_on_failed_summary(run_clean_validation(cfg, year, logger))
             _raise_on_failed_summary(run_mart_validation(cfg, year, logger))

diff --git a/toolkit/cli/common.py b/toolkit/cli/common.py
@@ -31,3 +31,38 @@ def iter_years(cfg, year_arg: int | None = None) -> list[int]:
     if year_arg not in cfg.years:
         raise ValueError(f"Year {year_arg} is not configured in dataset.yml")
     return [year_arg]
+
+
+def iter_selected_years(
+    cfg,
+    *,
+    year_arg: int | None = None,
+    years_arg: str | None = None,
+) -> list[int]:
+    if year_arg is not None and years_arg is not None:
+        raise ValueError("Use either --year or --years, not both")
+
+    if years_arg is None:
+        return iter_years(cfg, year_arg)
+
+    requested: list[int] = []
+    for raw_part in years_arg.split(","):
+        part = raw_part.strip()
+        if not part:
+            raise ValueError("Invalid --years value: empty year entry")
+        try:
+            year = int(part)
+        except ValueError as exc:
+            raise ValueError(f"Invalid --years value: '{part}' is not an integer year") from exc
+        if year not in requested:
+            requested.append(year)
+
+    if not requested:
+        raise ValueError("Invalid --years value: no years provided")
+
+    invalid = [year for year in requested if year not in cfg.years]
+    if invalid:
+        listed = ", ".join(str(year) for year in invalid)
+        raise ValueError(f"Year(s) not configured in dataset.yml: {listed}")
+
+    return requested