Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,35 @@ def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_pat
assert cfg.mart["label_path"] == "labels/mart.txt"


def test_load_config_preserves_year_template_in_raw_local_file_path(tmp_path: Path):
project_dir = tmp_path / "project"
project_dir.mkdir()

yml = project_dir / "dataset.yml"
yml.write_text(
"""
root: "./out"
dataset:
name: demo
years: [2022, 2023]
raw:
sources:
- type: local_file
args:
path: "data/raw_{year}.csv"
filename: "raw_{year}.csv"
clean: {}
mart: {}
""".strip(),
encoding="utf-8",
)

cfg = load_config(yml)

assert cfg.raw["sources"][0]["args"]["path"] == str((project_dir / "data" / "raw_{year}.csv").resolve())
assert cfg.raw["sources"][0]["args"]["filename"] == "raw_{year}.csv"


def test_load_config_logs_normalized_whitelist_fields(tmp_path: Path, caplog, monkeypatch):
project_dir = tmp_path / "project"
project_dir.mkdir()
Expand Down
80 changes: 80 additions & 0 deletions tests/test_smoke_tiny_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,3 +253,83 @@ def test_smoke_e2e_local_zip_extractor(tmp_path: Path) -> None:
raw_dir = Path(cfg.root) / "data" / "raw" / cfg.dataset / str(year)
raw_manifest = json.loads((raw_dir / "manifest.json").read_text(encoding="utf-8"))
assert raw_manifest["primary_output_file"] == "zip_payload.csv"


def test_smoke_e2e_local_file_path_year_template(tmp_path: Path) -> None:
project_dir = tmp_path / "templated_local_project"
data_dir = project_dir / "data"
data_dir.mkdir(parents=True, exist_ok=True)
shutil.copy(FIXTURES_DIR / "it_small.csv", data_dir / "it_small_2024.csv")

_write_text(
project_dir / "sql" / "clean.sql",
"""
SELECT
comune,
CAST(anno AS INTEGER) AS anno,
CAST(valore AS DOUBLE) AS valore
FROM raw_input
""",
)
_write_text(
project_dir / "sql" / "mart_totali.sql",
"""
SELECT
anno,
SUM(valore) AS totale
FROM clean_input
GROUP BY anno
""",
)
_write_text(
project_dir / "dataset.yml",
"""
schema_version: 1
root: out
dataset:
name: tiny_csv_it_templated
years: [2024]
raw:
output_policy: overwrite
sources:
- name: csv_it
type: local_file
primary: true
args:
path: data/it_small_{year}.csv
filename: tiny_it_{year}.csv
clean:
sql: sql/clean.sql
read_mode: strict
read:
source: config_only
header: true
delim: ";"
decimal: ","
mode: explicit
include: tiny_it_2024.csv
required_columns: comune
validate:
not_null: valore
mart:
tables:
- name: mart_totali
sql: sql/mart_totali.sql
required_tables: mart_totali
validate:
table_rules:
mart_totali:
required_columns: [anno, totale]
""",
)

cfg = load_config(project_dir / "dataset.yml")
year = cfg.years[0]
context = run_year(cfg, year, step="all", logger=_project_logger())

_assert_run_success(context.path)
_assert_common_outputs(Path(cfg.root), cfg.dataset, year, ["mart_totali"])

raw_dir = Path(cfg.root) / "data" / "raw" / cfg.dataset / str(year)
raw_manifest = json.loads((raw_dir / "manifest.json").read_text(encoding="utf-8"))
assert raw_manifest["primary_output_file"] == "tiny_it_2024.csv"
7 changes: 7 additions & 0 deletions toolkit/core/config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,13 @@ def _resolve_path_value(value: Any, *, base_dir: Path) -> Any:
text = value.strip()
if not text:
return value
if "{year}" in text:
sentinel = "__DCL_YEAR_PLACEHOLDER__"
templated = text.replace("{year}", sentinel)
path = Path(templated).expanduser()
if path.is_absolute():
return str(path.resolve()).replace(sentinel, "{year}")
return str((base_dir / path).resolve()).replace(sentinel, "{year}")
path = Path(text).expanduser()
if path.is_absolute():
return path.resolve()
Expand Down