Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions tests/test_cli_years_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from __future__ import annotations

import shutil
from pathlib import Path

from typer.testing import CliRunner

from toolkit.cli.app import app


def _copy_project_example_multi_year(dst: Path) -> Path:
src = Path("project-example")
shutil.copytree(src, dst)
shutil.rmtree(dst / "_smoke_out", ignore_errors=True)

config_path = dst / "dataset.yml"
config_text = config_path.read_text(encoding="utf-8")
config_text = config_text.replace(' years: [2022]\n', ' years: [2022, 2023]\n')
config_path.write_text(config_text, encoding="utf-8")
return config_path


def test_cli_run_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None:
project_dir = tmp_path / "project-example"
config_path = _copy_project_example_multi_year(project_dir)

monkeypatch.chdir(tmp_path)
runner = CliRunner()
result = runner.invoke(
app,
["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"],
)

assert result.exit_code == 0, result.output

root = project_dir / "_smoke_out"
raw_2022_dir = root / "data" / "raw" / "project_example" / "2022"
raw_2023_dir = root / "data" / "raw" / "project_example" / "2023"
mart_2022_dir = root / "data" / "mart" / "project_example" / "2022"
mart_2023_dir = root / "data" / "mart" / "project_example" / "2023"

assert not raw_2022_dir.exists()
assert raw_2023_dir.exists()
assert not mart_2022_dir.exists()
assert mart_2023_dir.exists()


def test_cli_validate_all_supports_years_filter(tmp_path: Path, monkeypatch) -> None:
project_dir = tmp_path / "project-example"
config_path = _copy_project_example_multi_year(project_dir)

monkeypatch.chdir(tmp_path)
runner = CliRunner()

run_result = runner.invoke(
app,
["run", "all", "--config", str(config_path), "--years", "2023", "--strict-config"],
)
assert run_result.exit_code == 0, run_result.output

validate_result = runner.invoke(
app,
["validate", "all", "--config", str(config_path), "--years", "2023", "--strict-config"],
)
assert validate_result.exit_code == 0, validate_result.output


def test_cli_years_filter_rejects_unconfigured_year(tmp_path: Path, monkeypatch) -> None:
project_dir = tmp_path / "project-example"
config_path = _copy_project_example_multi_year(project_dir)

monkeypatch.chdir(tmp_path)
runner = CliRunner()
result = runner.invoke(
app,
["run", "all", "--config", str(config_path), "--years", "2024", "--strict-config"],
)

assert result.exit_code != 0
assert result.exception is not None
assert "Year(s) not configured in dataset.yml: 2024" in str(result.exception)


def test_cli_run_all_without_years_keeps_direct_python_invocation_compat(
tmp_path: Path,
monkeypatch,
) -> None:
project_dir = tmp_path / "project-example"
config_path = _copy_project_example_multi_year(project_dir)

monkeypatch.chdir(tmp_path)

from toolkit.cli.cmd_run import run as run_cmd

run_cmd(step="all", config=str(config_path))

root = project_dir / "_smoke_out"
assert (root / "data" / "raw" / "project_example" / "2022").exists()
assert (root / "data" / "raw" / "project_example" / "2023").exists()
17 changes: 11 additions & 6 deletions toolkit/cli/cmd_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import typer

from toolkit.cli.common import iter_years, load_cfg_and_logger
from toolkit.cli.common import iter_selected_years, load_cfg_and_logger
from toolkit.clean.run import run_clean
from toolkit.clean.validate import run_clean_validation
from toolkit.cross.run import run_cross_year
Expand Down Expand Up @@ -115,6 +115,7 @@ def _print_execution_plan(cfg, year: int, layers: list[str], context: RunContext
def run_cross_year_step(
cfg,
*,
years: list[int] | None = None,
dry_run: bool = False,
logger=None,
) -> None:
Expand All @@ -123,13 +124,14 @@ def run_cross_year_step(

_validate_execution_plan(cfg, "cross_year")
output_dir = layer_dataset_dir(cfg.root, "cross", cfg.dataset)
selected_years = list(years) if years is not None else list(cfg.years)

if dry_run:
typer.echo("Execution Plan")
typer.echo(f"dataset: {cfg.dataset}")
typer.echo("scope: cross_year")
typer.echo("status: DRY_RUN")
typer.echo(f"years: {', '.join(str(year) for year in cfg.years)}")
typer.echo(f"years: {', '.join(str(year) for year in selected_years)}")
typer.echo("steps: cross_year")
typer.echo(f"output_dir: {output_dir}")
typer.echo("")
Expand All @@ -138,14 +140,14 @@ def run_cross_year_step(
logger.info(
"RUN cross_year | dataset=%s years=%s base_dir=%s effective_root=%s root_source=%s",
cfg.dataset,
",".join(str(year) for year in cfg.years),
",".join(str(year) for year in selected_years),
cfg.base_dir,
cfg.root,
cfg.root_source,
)
run_cross_year(
cfg.dataset,
cfg.years,
selected_years,
cfg.root,
cfg.cross_year,
logger,
Expand Down Expand Up @@ -262,6 +264,7 @@ def _execute_layer(layer_name: str, target, *args, **kwargs) -> None:
def run(
step: str = typer.Argument(..., help="raw | clean | mart | cross_year | all"),
config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"),
years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"),
dry_run: bool = typer.Option(False, "--dry-run", help="Print execution plan without executing"),
strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"),
):
Expand All @@ -271,15 +274,17 @@ def run(
strict_config_flag = strict_config if isinstance(strict_config, bool) else False
cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag)
dry_run_flag = dry_run if isinstance(dry_run, bool) else False
years_arg = years if isinstance(years, str) else None
selected_years = iter_selected_years(cfg, years_arg=years_arg)

if step not in {"raw", "clean", "mart", "cross_year", "all"}:
raise typer.BadParameter("step must be one of: raw, clean, mart, cross_year, all")

if step == "cross_year":
run_cross_year_step(cfg, dry_run=dry_run_flag, logger=logger)
run_cross_year_step(cfg, years=selected_years, dry_run=dry_run_flag, logger=logger)
return

for year in iter_years(cfg, None):
for year in selected_years:
run_year(cfg, year, step=step, dry_run=dry_run_flag, logger=logger)


Expand Down
7 changes: 5 additions & 2 deletions toolkit/cli/cmd_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import typer

from toolkit.cli.common import iter_years, load_cfg_and_logger
from toolkit.cli.common import iter_selected_years, load_cfg_and_logger
from toolkit.clean.validate import run_clean_validation
from toolkit.mart.validate import run_mart_validation

Expand All @@ -15,6 +15,7 @@ def _raise_on_failed_summary(summary: dict[str, object]) -> None:
def validate(
step: str = typer.Argument(..., help="clean | mart | all"),
config: str = typer.Option(..., "--config", "-c", help="Path to dataset.yml"),
years: str | None = typer.Option(None, "--years", help="Comma-separated dataset years"),
strict_config: bool = typer.Option(False, "--strict-config", help="Treat deprecated config forms as errors"),
):
"""
Expand All @@ -25,8 +26,10 @@ def validate(
"""
strict_config_flag = strict_config if isinstance(strict_config, bool) else False
cfg, logger = load_cfg_and_logger(config, strict_config=strict_config_flag)
years_arg = years if isinstance(years, str) else None
selected_years = iter_selected_years(cfg, years_arg=years_arg)

for year in iter_years(cfg, None):
for year in selected_years:
if step == "all":
_raise_on_failed_summary(run_clean_validation(cfg, year, logger))
_raise_on_failed_summary(run_mart_validation(cfg, year, logger))
Expand Down
35 changes: 35 additions & 0 deletions toolkit/cli/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,38 @@ def iter_years(cfg, year_arg: int | None = None) -> list[int]:
if year_arg not in cfg.years:
raise ValueError(f"Year {year_arg} is not configured in dataset.yml")
return [year_arg]


def iter_selected_years(
cfg,
*,
year_arg: int | None = None,
years_arg: str | None = None,
) -> list[int]:
if year_arg is not None and years_arg is not None:
raise ValueError("Use either --year or --years, not both")

if years_arg is None:
return iter_years(cfg, year_arg)

requested: list[int] = []
for raw_part in years_arg.split(","):
part = raw_part.strip()
if not part:
raise ValueError("Invalid --years value: empty year entry")
try:
year = int(part)
except ValueError as exc:
raise ValueError(f"Invalid --years value: '{part}' is not an integer year") from exc
if year not in requested:
requested.append(year)

if not requested:
raise ValueError("Invalid --years value: no years provided")

invalid = [year for year in requested if year not in cfg.years]
if invalid:
listed = ", ".join(str(year) for year in invalid)
raise ValueError(f"Year(s) not configured in dataset.yml: {listed}")

return requested
Loading