diff --git a/tests/test_config.py b/tests/test_config.py index defa021..dfeadc6 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -124,6 +124,35 @@ def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_pat assert cfg.mart["label_path"] == "labels/mart.txt" +def test_load_config_preserves_year_template_in_raw_local_file_path(tmp_path: Path): + project_dir = tmp_path / "project" + project_dir.mkdir() + + yml = project_dir / "dataset.yml" + yml.write_text( + """ +root: "./out" +dataset: + name: demo + years: [2022, 2023] +raw: + sources: + - type: local_file + args: + path: "data/raw_{year}.csv" + filename: "raw_{year}.csv" +clean: {} +mart: {} +""".strip(), + encoding="utf-8", + ) + + cfg = load_config(yml) + + assert cfg.raw["sources"][0]["args"]["path"] == str((project_dir / "data" / "raw_{year}.csv").resolve()) + assert cfg.raw["sources"][0]["args"]["filename"] == "raw_{year}.csv" + + def test_load_config_logs_normalized_whitelist_fields(tmp_path: Path, caplog, monkeypatch): project_dir = tmp_path / "project" project_dir.mkdir() diff --git a/tests/test_smoke_tiny_e2e.py b/tests/test_smoke_tiny_e2e.py index 0e1596e..5f7d10c 100644 --- a/tests/test_smoke_tiny_e2e.py +++ b/tests/test_smoke_tiny_e2e.py @@ -253,3 +253,83 @@ def test_smoke_e2e_local_zip_extractor(tmp_path: Path) -> None: raw_dir = Path(cfg.root) / "data" / "raw" / cfg.dataset / str(year) raw_manifest = json.loads((raw_dir / "manifest.json").read_text(encoding="utf-8")) assert raw_manifest["primary_output_file"] == "zip_payload.csv" + + +def test_smoke_e2e_local_file_path_year_template(tmp_path: Path) -> None: + project_dir = tmp_path / "templated_local_project" + data_dir = project_dir / "data" + data_dir.mkdir(parents=True, exist_ok=True) + shutil.copy(FIXTURES_DIR / "it_small.csv", data_dir / "it_small_2024.csv") + + _write_text( + project_dir / "sql" / "clean.sql", + """ + SELECT + comune, + CAST(anno AS INTEGER) AS anno, + CAST(valore AS DOUBLE) AS valore + FROM raw_input + """, + ) + _write_text( + project_dir / "sql" / "mart_totali.sql", + """ + SELECT + anno, + SUM(valore) AS totale + FROM clean_input + GROUP BY anno + """, + ) + _write_text( + project_dir / "dataset.yml", + """ + schema_version: 1 + root: out + dataset: + name: tiny_csv_it_templated + years: [2024] + raw: + output_policy: overwrite + sources: + - name: csv_it + type: local_file + primary: true + args: + path: data/it_small_{year}.csv + filename: tiny_it_{year}.csv + clean: + sql: sql/clean.sql + read_mode: strict + read: + source: config_only + header: true + delim: ";" + decimal: "," + mode: explicit + include: tiny_it_2024.csv + required_columns: comune + validate: + not_null: valore + mart: + tables: + - name: mart_totali + sql: sql/mart_totali.sql + required_tables: mart_totali + validate: + table_rules: + mart_totali: + required_columns: [anno, totale] + """, + ) + + cfg = load_config(project_dir / "dataset.yml") + year = cfg.years[0] + context = run_year(cfg, year, step="all", logger=_project_logger()) + + _assert_run_success(context.path) + _assert_common_outputs(Path(cfg.root), cfg.dataset, year, ["mart_totali"]) + + raw_dir = Path(cfg.root) / "data" / "raw" / cfg.dataset / str(year) + raw_manifest = json.loads((raw_dir / "manifest.json").read_text(encoding="utf-8")) + assert raw_manifest["primary_output_file"] == "tiny_it_2024.csv" diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py index 06275de..601a5ca 100644 --- a/toolkit/core/config_models.py +++ b/toolkit/core/config_models.py @@ -463,6 +463,13 @@ def _resolve_path_value(value: Any, *, base_dir: Path) -> Any: text = value.strip() if not text: return value + if "{year}" in text: + sentinel = "__DCL_YEAR_PLACEHOLDER__" + templated = text.replace("{year}", sentinel) + path = Path(templated).expanduser() + if path.is_absolute(): + return str(path.resolve()).replace(sentinel, "{year}") + return str((base_dir / path).resolve()).replace(sentinel, "{year}") path = Path(text).expanduser() if path.is_absolute(): return path.resolve()