Skip to content

Commit bf20295

Browse files
authored
fix: preserve year templates in local raw paths (#18)
1 parent 89853ac commit bf20295

3 files changed

Lines changed: 116 additions & 0 deletions

File tree

tests/test_config.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,35 @@ def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_pat
124124
assert cfg.mart["label_path"] == "labels/mart.txt"
125125

126126

127+
def test_load_config_preserves_year_template_in_raw_local_file_path(tmp_path: Path):
128+
project_dir = tmp_path / "project"
129+
project_dir.mkdir()
130+
131+
yml = project_dir / "dataset.yml"
132+
yml.write_text(
133+
"""
134+
root: "./out"
135+
dataset:
136+
name: demo
137+
years: [2022, 2023]
138+
raw:
139+
sources:
140+
- type: local_file
141+
args:
142+
path: "data/raw_{year}.csv"
143+
filename: "raw_{year}.csv"
144+
clean: {}
145+
mart: {}
146+
""".strip(),
147+
encoding="utf-8",
148+
)
149+
150+
cfg = load_config(yml)
151+
152+
assert cfg.raw["sources"][0]["args"]["path"] == str((project_dir / "data" / "raw_{year}.csv").resolve())
153+
assert cfg.raw["sources"][0]["args"]["filename"] == "raw_{year}.csv"
154+
155+
127156
def test_load_config_logs_normalized_whitelist_fields(tmp_path: Path, caplog, monkeypatch):
128157
project_dir = tmp_path / "project"
129158
project_dir.mkdir()

tests/test_smoke_tiny_e2e.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,3 +253,83 @@ def test_smoke_e2e_local_zip_extractor(tmp_path: Path) -> None:
253253
raw_dir = Path(cfg.root) / "data" / "raw" / cfg.dataset / str(year)
254254
raw_manifest = json.loads((raw_dir / "manifest.json").read_text(encoding="utf-8"))
255255
assert raw_manifest["primary_output_file"] == "zip_payload.csv"
256+
257+
258+
def test_smoke_e2e_local_file_path_year_template(tmp_path: Path) -> None:
259+
project_dir = tmp_path / "templated_local_project"
260+
data_dir = project_dir / "data"
261+
data_dir.mkdir(parents=True, exist_ok=True)
262+
shutil.copy(FIXTURES_DIR / "it_small.csv", data_dir / "it_small_2024.csv")
263+
264+
_write_text(
265+
project_dir / "sql" / "clean.sql",
266+
"""
267+
SELECT
268+
comune,
269+
CAST(anno AS INTEGER) AS anno,
270+
CAST(valore AS DOUBLE) AS valore
271+
FROM raw_input
272+
""",
273+
)
274+
_write_text(
275+
project_dir / "sql" / "mart_totali.sql",
276+
"""
277+
SELECT
278+
anno,
279+
SUM(valore) AS totale
280+
FROM clean_input
281+
GROUP BY anno
282+
""",
283+
)
284+
_write_text(
285+
project_dir / "dataset.yml",
286+
"""
287+
schema_version: 1
288+
root: out
289+
dataset:
290+
name: tiny_csv_it_templated
291+
years: [2024]
292+
raw:
293+
output_policy: overwrite
294+
sources:
295+
- name: csv_it
296+
type: local_file
297+
primary: true
298+
args:
299+
path: data/it_small_{year}.csv
300+
filename: tiny_it_{year}.csv
301+
clean:
302+
sql: sql/clean.sql
303+
read_mode: strict
304+
read:
305+
source: config_only
306+
header: true
307+
delim: ";"
308+
decimal: ","
309+
mode: explicit
310+
include: tiny_it_2024.csv
311+
required_columns: comune
312+
validate:
313+
not_null: valore
314+
mart:
315+
tables:
316+
- name: mart_totali
317+
sql: sql/mart_totali.sql
318+
required_tables: mart_totali
319+
validate:
320+
table_rules:
321+
mart_totali:
322+
required_columns: [anno, totale]
323+
""",
324+
)
325+
326+
cfg = load_config(project_dir / "dataset.yml")
327+
year = cfg.years[0]
328+
context = run_year(cfg, year, step="all", logger=_project_logger())
329+
330+
_assert_run_success(context.path)
331+
_assert_common_outputs(Path(cfg.root), cfg.dataset, year, ["mart_totali"])
332+
333+
raw_dir = Path(cfg.root) / "data" / "raw" / cfg.dataset / str(year)
334+
raw_manifest = json.loads((raw_dir / "manifest.json").read_text(encoding="utf-8"))
335+
assert raw_manifest["primary_output_file"] == "tiny_it_2024.csv"

toolkit/core/config_models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,13 @@ def _resolve_path_value(value: Any, *, base_dir: Path) -> Any:
463463
text = value.strip()
464464
if not text:
465465
return value
466+
if "{year}" in text:
467+
sentinel = "__DCL_YEAR_PLACEHOLDER__"
468+
templated = text.replace("{year}", sentinel)
469+
path = Path(templated).expanduser()
470+
if path.is_absolute():
471+
return str(path.resolve()).replace(sentinel, "{year}")
472+
return str((base_dir / path).resolve()).replace(sentinel, "{year}")
466473
path = Path(text).expanduser()
467474
if path.is_absolute():
468475
return path.resolve()

0 commit comments

Comments
 (0)