diff --git a/tests/test_artifacts_policy.py b/tests/test_artifacts_policy.py index c0642db..7173bcc 100644 --- a/tests/test_artifacts_policy.py +++ b/tests/test_artifacts_policy.py @@ -216,3 +216,90 @@ def test_run_mart_supports_root_posix_placeholder(tmp_path: Path) -> None: assert mart_output.exists() assert duckdb.execute(f"SELECT marker FROM read_parquet('{mart_output.as_posix()}')").fetchone() == ("ok",) assert result["output_rows"] == 1 + + +def test_run_mart_supports_support_placeholder(tmp_path: Path) -> None: + config_path = tmp_path / "dataset.yml" + sql_dir = tmp_path / "sql" + sql_dir.mkdir(parents=True, exist_ok=True) + root_dir = tmp_path / "out" + dataset = "demo_ds" + year = 2022 + + clean_dir = root_dir / "data" / "clean" / dataset / str(year) + clean_dir.mkdir(parents=True, exist_ok=True) + clean_path = clean_dir / f"{dataset}_{year}_clean.parquet" + duckdb.execute(f"COPY (SELECT 1 AS value) TO '{clean_path.as_posix()}' (FORMAT PARQUET)") + + support_root = tmp_path / "support_out" + support_output = support_root / "data" / "mart" / "lookup_ds" / "2024" / "lookup_table.parquet" + support_output.parent.mkdir(parents=True, exist_ok=True) + duckdb.execute( + f"COPY (SELECT 'ok' AS marker) TO '{support_output.as_posix()}' (FORMAT PARQUET)" + ) + + support_config = tmp_path / "support_dataset.yml" + support_config.write_text( + "\n".join( + [ + f'root: "{support_root.as_posix()}"', + "dataset:", + ' name: "lookup_ds"', + " years: [2024]", + "raw: {}", + "clean: {}", + "mart:", + " tables:", + ' - name: "lookup_table"', + ' sql: "sql/lookup.sql"', + ] + ), + encoding="utf-8", + ) + + (sql_dir / "mart_example.sql").write_text( + "select * from read_parquet('{support.lookup.mart}')", + encoding="utf-8", + ) + config_path.write_text( + "\n".join( + [ + f'root: "{root_dir.as_posix()}"', + "dataset:", + f' name: "{dataset}"', + f" years: [{year}]", + "raw: {}", + "clean:", + ' sql: "sql/clean.sql"', + "mart:", + " tables:", + ' - name: "mart_example"', + ' sql: "sql/mart_example.sql"', + "support:", + ' - name: "lookup"', + f' config: "{support_config.as_posix()}"', + " years: [2024]", + ] + ), + encoding="utf-8", + ) + (tmp_path / "sql" / "clean.sql").write_text("select 1 as value", encoding="utf-8") + + cfg = load_config(config_path) + logger = _NoopLogger() + result = run_mart( + cfg.dataset, + year, + cfg.root, + cfg.mart, + logger, + base_dir=cfg.base_dir, + clean_cfg=cfg.clean, + output_cfg=cfg.output, + support_cfg=cfg.support, + ) + + mart_output = root_dir / "data" / "mart" / dataset / str(year) / "mart_example.parquet" + assert mart_output.exists() + assert duckdb.execute(f"SELECT marker FROM read_parquet('{mart_output.as_posix()}')").fetchone() == ("ok",) + assert result["output_rows"] == 1 diff --git a/tests/test_cli_inspect_paths.py b/tests/test_cli_inspect_paths.py index 170aaeb..20da842 100644 --- a/tests/test_cli_inspect_paths.py +++ b/tests/test_cli_inspect_paths.py @@ -70,3 +70,65 @@ def test_inspect_paths_json_is_notebook_friendly(tmp_path: Path, monkeypatch) -> assert payload["raw_hints"]["suggested_read_exists"] is False assert payload["raw_hints"]["suggested_read_path"].endswith("suggested_read.yml") assert payload["latest_run"] is None + + +def test_inspect_paths_json_reports_resolved_support_outputs(tmp_path: Path) -> None: + runner = CliRunner() + + support_root = tmp_path / "support_out" + support_config = tmp_path / "support_dataset.yml" + support_config.write_text( + "\n".join( + [ + f'root: "{support_root.as_posix()}"', + "dataset:", + ' name: "support_ds"', + " years: [2024]", + "raw: {}", + "clean: {}", + "mart:", + " tables:", + ' - name: "support_table"', + ' sql: "sql/support.sql"', + ] + ), + encoding="utf-8", + ) + + config_path = tmp_path / "dataset.yml" + root_dir = tmp_path / "out" + config_path.write_text( + "\n".join( + [ + f'root: "{root_dir.as_posix()}"', + "dataset:", + ' name: "demo_ds"', + " years: [2022]", + "raw: {}", + "clean: {}", + "mart: {}", + "support:", + ' - name: "scuole"', + f' config: "{support_config.as_posix()}"', + " years: [2024]", + ] + ), + encoding="utf-8", + ) + + result = runner.invoke( + app, + ["inspect", "paths", "--config", str(config_path), "--year", "2022", "--json", "--strict-config"], + ) + + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + assert payload["paths"]["support"] + support_payload = payload["paths"]["support"][0] + assert support_payload["name"] == "scuole" + assert support_payload["dataset"] == "support_ds" + assert support_payload["years"] == [2024] + assert support_payload["outputs"] == [ + str(support_root / "data" / "mart" / "support_ds" / "2024" / "support_table.parquet") + ] + assert support_payload["mart"].endswith("support_table.parquet") diff --git a/tests/test_config.py b/tests/test_config.py index fdd1227..01535a6 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -101,6 +101,69 @@ def test_load_config_resolves_relative_paths_from_dataset_dir(tmp_path: Path): assert cfg.cross_year["tables"][0]["sql"] == (project_dir / "sql" / "cross" / "demo_cross.sql").resolve() +def test_load_config_resolves_support_config_paths_from_dataset_dir(tmp_path: Path): + project_dir = tmp_path / "project" + support_dir = tmp_path / "support" + project_dir.mkdir() + support_dir.mkdir() + + yml = project_dir / "dataset.yml" + yml.write_text( + """ +root: "./out" +dataset: + name: demo + years: [2022] +raw: {} +clean: {} +mart: {} +support: + - name: scuole + config: "../support/dataset.yml" + years: [2024] +""".strip(), + encoding="utf-8", + ) + + cfg = load_config(yml) + + assert cfg.support == [ + { + "name": "scuole", + "config": (support_dir / "dataset.yml").resolve(), + "years": [2024], + } + ] + + +def test_load_config_rejects_duplicate_support_names(tmp_path: Path): + yml = tmp_path / "dataset.yml" + yml.write_text( + """ +root: "./out" +dataset: + name: demo + years: [2022] +raw: {} +clean: {} +mart: {} +support: + - name: scuole + config: "./support_a.yml" + years: [2024] + - name: scuole + config: "./support_b.yml" + years: [2025] +""".strip(), + encoding="utf-8", + ) + + with pytest.raises(ValueError) as e: + load_config(yml) + + assert "support[].name values must be unique" in str(e.value) + + def test_load_config_does_not_transform_non_whitelisted_path_like_fields(tmp_path: Path): project_dir = tmp_path / "project" project_dir.mkdir() diff --git a/tests/test_run_dry_run.py b/tests/test_run_dry_run.py index d6ed8da..6358d73 100644 --- a/tests/test_run_dry_run.py +++ b/tests/test_run_dry_run.py @@ -209,6 +209,206 @@ def test_run_dry_run_accepts_mart_sql_with_root_posix_placeholder(tmp_path: Path assert "sql_validation: OK" in result.output +def test_run_dry_run_accepts_mart_sql_with_support_placeholder(tmp_path: Path) -> None: + sql_dir = tmp_path / "sql" / "mart" + sql_dir.mkdir(parents=True, exist_ok=True) + root_dir = tmp_path / "out" + + support_root = tmp_path / "support_out" + support_output = support_root / "data" / "mart" / "lookup_ds" / "2024" / "lookup_table.parquet" + support_output.parent.mkdir(parents=True, exist_ok=True) + duckdb.execute( + f"COPY (SELECT 7 AS lookup_value) TO '{support_output.as_posix()}' (FORMAT PARQUET)" + ) + + support_config = tmp_path / "support_dataset.yml" + support_config.write_text( + "\n".join( + [ + f'root: "{support_root.as_posix()}"', + "dataset:", + ' name: "lookup_ds"', + " years: [2024]", + "raw: {}", + "clean: {}", + "mart:", + " tables:", + ' - name: "lookup_table"', + ' sql: "sql/lookup.sql"', + ] + ), + encoding="utf-8", + ) + + (tmp_path / "sql" / "clean.sql").write_text("select 1 as value", encoding="utf-8") + (sql_dir / "mart_example.sql").write_text( + "select * from read_parquet('{support.lookup.mart}')", + encoding="utf-8", + ) + + config_path = tmp_path / "dataset.yml" + config_path.write_text( + "\n".join( + [ + f'root: "{root_dir.as_posix()}"', + "dataset:", + ' name: "demo_ds"', + " years: [2022]", + "raw: {}", + "clean:", + ' sql: "sql/clean.sql"', + "mart:", + " tables:", + ' - name: "mart_example"', + ' sql: "sql/mart/mart_example.sql"', + "support:", + ' - name: "lookup"', + f' config: "{support_config.as_posix()}"', + " years: [2024]", + ] + ), + encoding="utf-8", + ) + + runner = CliRunner() + result = runner.invoke(app, ["run", "all", "--config", str(config_path), "--dry-run"]) + + assert result.exit_code == 0 + assert "sql_validation: OK" in result.output + + +def test_run_dry_run_fails_when_support_output_is_missing(tmp_path: Path) -> None: + sql_dir = tmp_path / "sql" / "mart" + sql_dir.mkdir(parents=True, exist_ok=True) + root_dir = tmp_path / "out" + + support_root = tmp_path / "support_out" + support_config = tmp_path / "support_dataset.yml" + support_config.write_text( + "\n".join( + [ + f'root: "{support_root.as_posix()}"', + "dataset:", + ' name: "lookup_ds"', + " years: [2024]", + "raw: {}", + "clean: {}", + "mart:", + " tables:", + ' - name: "lookup_table"', + ' sql: "sql/lookup.sql"', + ] + ), + encoding="utf-8", + ) + + (tmp_path / "sql" / "clean.sql").write_text("select 1 as value", encoding="utf-8") + (sql_dir / "mart_example.sql").write_text( + "select * from read_parquet('{support.lookup.mart}')", + encoding="utf-8", + ) + + config_path = tmp_path / "dataset.yml" + config_path.write_text( + "\n".join( + [ + f'root: "{root_dir.as_posix()}"', + "dataset:", + ' name: "demo_ds"', + " years: [2022]", + "raw: {}", + "clean:", + ' sql: "sql/clean.sql"', + "mart:", + " tables:", + ' - name: "mart_example"', + ' sql: "sql/mart/mart_example.sql"', + "support:", + ' - name: "lookup"', + f' config: "{support_config.as_posix()}"', + " years: [2024]", + ] + ), + encoding="utf-8", + ) + + runner = CliRunner() + result = runner.invoke(app, ["run", "all", "--config", str(config_path), "--dry-run"]) + + assert result.exit_code != 0 + assert "Support dataset output mancante" in str(result.exception) + + +def test_run_dry_run_fails_when_support_outputs_are_only_partially_present(tmp_path: Path) -> None: + sql_dir = tmp_path / "sql" / "mart" + sql_dir.mkdir(parents=True, exist_ok=True) + root_dir = tmp_path / "out" + + support_root = tmp_path / "support_out" + support_output = support_root / "data" / "mart" / "lookup_ds" / "2024" / "lookup_a.parquet" + support_output.parent.mkdir(parents=True, exist_ok=True) + duckdb.execute( + f"COPY (SELECT 7 AS lookup_value) TO '{support_output.as_posix()}' (FORMAT PARQUET)" + ) + + support_config = tmp_path / "support_dataset.yml" + support_config.write_text( + "\n".join( + [ + f'root: "{support_root.as_posix()}"', + "dataset:", + ' name: "lookup_ds"', + " years: [2024]", + "raw: {}", + "clean: {}", + "mart:", + " tables:", + ' - name: "lookup_a"', + ' sql: "sql/lookup_a.sql"', + ' - name: "lookup_b"', + ' sql: "sql/lookup_b.sql"', + ] + ), + encoding="utf-8", + ) + + (tmp_path / "sql" / "clean.sql").write_text("select 1 as value", encoding="utf-8") + (sql_dir / "mart_example.sql").write_text( + "select * from read_parquet('{support.lookup.mart}')", + encoding="utf-8", + ) + + config_path = tmp_path / "dataset.yml" + config_path.write_text( + "\n".join( + [ + f'root: "{root_dir.as_posix()}"', + "dataset:", + ' name: "demo_ds"', + " years: [2022]", + "raw: {}", + "clean:", + ' sql: "sql/clean.sql"', + "mart:", + " tables:", + ' - name: "mart_example"', + ' sql: "sql/mart/mart_example.sql"', + "support:", + ' - name: "lookup"', + f' config: "{support_config.as_posix()}"', + " years: [2024]", + ] + ), + encoding="utf-8", + ) + + runner = CliRunner() + result = runner.invoke(app, ["run", "all", "--config", str(config_path), "--dry-run"]) + + assert result.exit_code != 0 + assert "Support dataset output mancante" in str(result.exception) + + def test_run_year_logs_effective_root_context(tmp_path: Path, caplog) -> None: sql_dir = tmp_path / "sql" / "mart" sql_dir.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_template.py b/tests/test_template.py index 1d74d63..623ac0f 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -6,3 +6,11 @@ def test_render_template_raises_clear_error_for_unresolved_placeholder(): with pytest.raises(ValueError, match=r"unresolved placeholders.*\{root_posix\}"): render_template("select * from read_parquet('{root_posix}/file.parquet')", {"year": 2024}) + + +def test_render_template_raises_clear_error_for_unresolved_dotted_placeholder(): + with pytest.raises(ValueError, match=r"unresolved placeholders.*\{support.lookup.mart\}"): + render_template( + "select * from read_parquet('{support.lookup.mart}')", + {"year": 2024}, + ) diff --git a/toolkit/cli/cmd_inspect.py b/toolkit/cli/cmd_inspect.py index 9dbd78a..e173b67 100644 --- a/toolkit/cli/cmd_inspect.py +++ b/toolkit/cli/cmd_inspect.py @@ -9,6 +9,7 @@ from toolkit.cli.common import iter_years from toolkit.core.config import load_config from toolkit.core.paths import layer_year_dir +from toolkit.core.support import resolve_support_payloads from toolkit.profile.raw import build_profile_hints from toolkit.core.run_context import get_run_dir, latest_run @@ -160,6 +161,7 @@ def _payload_for_year(cfg, year: int) -> dict[str, Any]: "raw": _raw_output_paths(root, cfg.dataset, year), "clean": _clean_paths(root, cfg.dataset, year), "mart": _mart_paths(root, cfg.dataset, year, mart_tables), + "support": resolve_support_payloads(cfg.support, require_exists=False), "run_dir": str(run_dir), }, "raw_hints": { @@ -227,6 +229,17 @@ def paths( typer.echo(f"mart_manifest: {item['paths']['mart']['manifest']}") typer.echo(f"mart_metadata: {item['paths']['mart']['metadata']}") typer.echo(f"mart_validation: {item['paths']['mart']['validation']}") + if item["paths"]["support"]: + typer.echo("support:") + for support in item["paths"]["support"]: + typer.echo(f" - name: {support['name']}") + typer.echo(f" dataset: {support['dataset']}") + typer.echo(f" config_path: {support['config_path']}") + typer.echo(f" years: {', '.join(str(year_value) for year_value in support['years'])}") + typer.echo(f" mart: {support['mart']}") + typer.echo(" outputs:") + for output in support["outputs"]: + typer.echo(f" - {output}") typer.echo(f"run_dir: {item['paths']['run_dir']}") latest_info = item.get("latest_run") if latest_info is None: diff --git a/toolkit/cli/cmd_run.py b/toolkit/cli/cmd_run.py index 4141679..a327437 100644 --- a/toolkit/cli/cmd_run.py +++ b/toolkit/cli/cmd_run.py @@ -287,6 +287,7 @@ def _execute_layer(layer_name: str, target, *args, **kwargs) -> None: base_dir=cfg.base_dir, clean_cfg=cfg.clean, output_cfg=cfg.output, + support_cfg=cfg.support, ) context.complete_run(success_with_warnings=run_has_validation_warnings) diff --git a/toolkit/cli/sql_dry_run.py b/toolkit/cli/sql_dry_run.py index a527b00..0912e08 100644 --- a/toolkit/cli/sql_dry_run.py +++ b/toolkit/cli/sql_dry_run.py @@ -6,6 +6,7 @@ import duckdb from toolkit.clean.run import _load_clean_sql +from toolkit.core.support import flatten_support_template_ctx, resolve_support_payloads from toolkit.core.template import build_runtime_template_ctx from toolkit.core.template import render_template from toolkit.mart.run import _resolve_sql_path as _resolve_mart_sql_path @@ -114,11 +115,13 @@ def _validate_mart_sql(cfg, *, year: int, con: duckdb.DuckDBPyConnection) -> Non con.execute("CREATE OR REPLACE VIEW clean AS SELECT * FROM clean_input") tables = cfg.mart.get("tables") or [] + support_payloads = resolve_support_payloads(cfg.support, require_exists=True) template_ctx = build_runtime_template_ctx( dataset=cfg.dataset, year=year, root=cfg.root, base_dir=cfg.base_dir, + support=flatten_support_template_ctx(support_payloads), ) for table in tables: diff --git a/toolkit/core/config.py b/toolkit/core/config.py index 7a9f44d..1eb39c9 100644 --- a/toolkit/core/config.py +++ b/toolkit/core/config.py @@ -23,6 +23,7 @@ class ToolkitConfig: raw: dict[str, Any] clean: dict[str, Any] mart: dict[str, Any] + support: list[dict[str, Any]] cross_year: dict[str, Any] config: dict[str, Any] validation: dict[str, Any] @@ -70,6 +71,17 @@ def _compat_cross_year(model: ToolkitConfigModel) -> dict[str, Any]: ) +def _compat_support(model: ToolkitConfigModel) -> list[dict[str, Any]]: + return [ + item.model_dump( + mode="python", + exclude_none=True, + exclude_unset=True, + ) + for item in model.support + ] + + def load_config( path: str | Path, *, @@ -87,6 +99,7 @@ def load_config( raw=model.raw.model_dump(mode="python", exclude_none=True, exclude_unset=True), clean=_compat_clean(model), mart=_compat_mart(model), + support=_compat_support(model), cross_year=_compat_cross_year(model), config=model.config.model_dump(mode="python"), validation=model.validation.model_dump(mode="python"), diff --git a/toolkit/core/config_models.py b/toolkit/core/config_models.py index 58279ed..a67e619 100644 --- a/toolkit/core/config_models.py +++ b/toolkit/core/config_models.py @@ -7,7 +7,7 @@ from typing import Any, Literal import yaml -from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator +from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator, model_validator from toolkit.core.csv_read import normalize_columns_spec @@ -100,6 +100,36 @@ class DatasetBlock(BaseModel): years: list[int] +class SupportDatasetConfig(BaseModel): + model_config = ConfigDict(extra="forbid") + + name: str + config: Path + years: list[int] + + @field_validator("name") + @classmethod + def _validate_name(cls, value: str) -> str: + text = value.strip() + if not text: + raise ValueError("support[].name must not be empty") + import re + + if not re.fullmatch(_SAFE_SQL_IDENTIFIER_RE, text): + raise ValueError( + "support[].name must be a safe identifier " + "(letters, numbers, underscore; cannot start with a number)" + ) + return text + + @field_validator("years") + @classmethod + def _validate_years(cls, value: list[int]) -> list[int]: + if not value: + raise ValueError("support[].years must not be empty") + return value + + class OutputConfig(BaseModel): model_config = ConfigDict(extra="forbid") @@ -422,11 +452,22 @@ class ToolkitConfigModel(BaseModel): raw: RawConfig = Field(default_factory=RawConfig) clean: CleanConfig = Field(default_factory=CleanConfig) mart: MartConfig = Field(default_factory=MartConfig) + support: list[SupportDatasetConfig] = Field(default_factory=list) cross_year: CrossYearConfig = Field(default_factory=CrossYearConfig) config: ConfigPolicy = Field(default_factory=ConfigPolicy) validation: GlobalValidationConfig = Field(default_factory=GlobalValidationConfig) output: OutputConfig = Field(default_factory=OutputConfig) + @model_validator(mode="after") + def _validate_unique_support_names(self) -> "ToolkitConfigModel": + names = [entry.name for entry in self.support] + duplicates = sorted({name for name in names if names.count(name) > 1}) + if duplicates: + raise ValueError( + "support[].name values must be unique: " + ", ".join(duplicates) + ) + return self + def _err(msg: str, *, path: Path) -> ValueError: return ValueError(f"{msg} (file: {path})") @@ -469,6 +510,9 @@ def _resolve_path_value(value: Any, *, base_dir: Path) -> Any: "mart": ( ("tables", "*", "sql"), ), + "support": ( + ("*", "config"), + ), "cross_year": ( ("tables", "*", "sql"), ), @@ -551,11 +595,16 @@ def _get_nested_value(container: Any, tokens: tuple[str, ...]) -> Any: def _normalize_section_paths( section_name: str, - section: dict[str, Any], + section: Any, *, base_dir: Path, -) -> tuple[dict[str, Any], list[tuple[str, Path]]]: - normalized = dict(section) +) -> tuple[Any, list[tuple[str, Path]]]: + if isinstance(section, dict): + normalized: Any = dict(section) + elif isinstance(section, list): + normalized = list(section) + else: + normalized = section changes: list[tuple[str, Path]] = [] for pattern in _SECTION_PATH_WHITELIST.get(section_name, ()): @@ -665,6 +714,7 @@ def _declared_model_keys(model_cls: type[BaseModel]) -> set[str]: "raw", "clean", "mart", + "support", "cross_year", "config", "validation", @@ -815,6 +865,7 @@ def load_config_model( raw = normalized.get("raw", {}) or {} clean = normalized.get("clean", {}) or {} mart = normalized.get("mart", {}) or {} + support = normalized.get("support", []) or [] cross_year = normalized.get("cross_year", {}) or {} normalized_fields: list[tuple[str, Path]] = [] @@ -827,6 +878,9 @@ def load_config_model( if isinstance(mart, dict): mart, mart_changes = _normalize_section_paths("mart", mart, base_dir=base_dir) normalized_fields.extend(mart_changes) + if isinstance(support, list): + support, support_changes = _normalize_section_paths("support", support, base_dir=base_dir) + normalized_fields.extend(support_changes) if isinstance(cross_year, dict): cross_year, cross_year_changes = _normalize_section_paths("cross_year", cross_year, base_dir=base_dir) normalized_fields.extend(cross_year_changes) @@ -844,6 +898,7 @@ def load_config_model( "raw": raw, "clean": clean, "mart": mart, + "support": support, "cross_year": cross_year, } diff --git a/toolkit/core/support.py b/toolkit/core/support.py new file mode 100644 index 0000000..a336c85 --- /dev/null +++ b/toolkit/core/support.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from toolkit.core.config import load_config +from toolkit.core.paths import layer_year_dir + + +def _support_expected_mart_outputs(cfg, year: int) -> list[Path]: + tables = cfg.mart.get("tables") or [] + mart_dir = layer_year_dir(cfg.root, "mart", cfg.dataset, year) + outputs: list[Path] = [] + for table in tables: + if not isinstance(table, dict): + continue + name = table.get("name") + if not name: + continue + outputs.append(mart_dir / f"{name}.parquet") + return outputs + + +def resolve_support_payloads( + support_entries: list[dict[str, Any]] | None, + *, + require_exists: bool, +) -> list[dict[str, Any]]: + resolved: list[dict[str, Any]] = [] + for entry in support_entries or []: + name = str(entry["name"]) + config_path = Path(entry["config"]) + years = [int(year) for year in entry.get("years") or []] + support_cfg = load_config(config_path) + + year_payloads: list[dict[str, Any]] = [] + all_outputs: list[str] = [] + for year in years: + expected_paths = _support_expected_mart_outputs(support_cfg, year) + output_paths = [str(path) for path in expected_paths] + existing_paths = [str(path) for path in expected_paths if path.exists()] + all_outputs_exist = len(output_paths) > 0 and len(existing_paths) == len(output_paths) + if require_exists and not output_paths: + raise ValueError( + "Support dataset MART non configurato: " + f"{name} ({config_path}) anno {year}. " + "Il dataset di supporto deve dichiarare almeno una tabella in mart.tables." + ) + if require_exists and not all_outputs_exist: + raise FileNotFoundError( + "Support dataset output mancante: " + f"{name} ({config_path}) anno {year}. " + "Esegui prima il MART del support dataset o correggi support[].years." + ) + year_payloads.append( + { + "year": year, + "dataset": support_cfg.dataset, + "config_path": str(config_path), + "mart_dir": str(layer_year_dir(support_cfg.root, "mart", support_cfg.dataset, year)), + "outputs": output_paths, + "existing_outputs": existing_paths, + "all_outputs_exist": all_outputs_exist, + } + ) + all_outputs.extend(existing_paths if require_exists else output_paths) + + resolved.append( + { + "name": name, + "config_path": str(config_path), + "dataset": support_cfg.dataset, + "years": years, + "years_resolved": year_payloads, + "outputs": all_outputs, + "mart": all_outputs[0] if all_outputs else None, + } + ) + return resolved + + +def flatten_support_template_ctx(payloads: list[dict[str, Any]]) -> dict[str, Any]: + ctx: dict[str, Any] = {} + for payload in payloads: + name = payload["name"] + ctx[f"support.{name}.outputs"] = payload["outputs"] + ctx[f"support.{name}.mart"] = payload["mart"] + return ctx diff --git a/toolkit/core/template.py b/toolkit/core/template.py index 45f9413..97b3526 100644 --- a/toolkit/core/template.py +++ b/toolkit/core/template.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Any -_UNRESOLVED_PLACEHOLDER_RE = re.compile(r"\{[A-Za-z_][A-Za-z0-9_]*\}") +_UNRESOLVED_PLACEHOLDER_RE = re.compile(r"\{[A-Za-z_][A-Za-z0-9_.]*\}") def render_template(text: str, ctx: dict[str, Any]) -> str: @@ -15,7 +15,7 @@ def render_template(text: str, ctx: dict[str, Any]) -> str: This is intentionally not a general templating engine. """ out = text - for k, v in ctx.items(): + for k, v in sorted(ctx.items(), key=lambda item: len(item[0]), reverse=True): out = out.replace("{" + k + "}", str(v)) unresolved = sorted(set(_UNRESOLVED_PLACEHOLDER_RE.findall(out))) if unresolved: @@ -32,6 +32,7 @@ def build_runtime_template_ctx( year: int, root: str | Path | None = None, base_dir: Path | None = None, + support: dict[str, Any] | None = None, ) -> dict[str, Any]: """ Build the minimal deterministic template context exposed to SQL runtime. @@ -51,6 +52,8 @@ def build_runtime_template_ctx( if base_dir is not None: ctx["base_dir"] = str(base_dir) ctx["base_dir_posix"] = base_dir.as_posix() + if support: + ctx.update(support) return ctx diff --git a/toolkit/mart/run.py b/toolkit/mart/run.py index bee21e2..65c4104 100644 --- a/toolkit/mart/run.py +++ b/toolkit/mart/run.py @@ -9,6 +9,7 @@ from toolkit.core.artifacts import ARTIFACT_POLICY_DEBUG, resolve_artifact_policy, should_write from toolkit.core.metadata import config_hash_for_year, file_record, write_layer_manifest, write_metadata from toolkit.core.paths import layer_year_dir, resolve_root, to_root_relative +from toolkit.core.support import flatten_support_template_ctx, resolve_support_payloads from toolkit.core.template import build_runtime_template_ctx, public_template_ctx, render_template @@ -42,6 +43,7 @@ def run_mart( base_dir: Path | None = None, clean_cfg: dict[str, Any] | None = None, output_cfg: dict[str, Any] | None = None, + support_cfg: list[dict[str, Any]] | None = None, ): policy = resolve_artifact_policy(output_cfg) root_dir = resolve_root(root) @@ -77,11 +79,13 @@ def run_mart( if not isinstance(tables, list) or not tables: raise ValueError("mart.tables missing or empty in dataset.yml") + support_payloads = resolve_support_payloads(support_cfg, require_exists=True) template_ctx = build_runtime_template_ctx( dataset=dataset, year=year, root=root_dir, base_dir=base_dir, + support=flatten_support_template_ctx(support_payloads), ) run_dir: Path | None = None