From 181d09b055e2cc0999594d0ecaba4cd557a9c5ca Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Wed, 11 Mar 2026 22:27:02 +0000 Subject: [PATCH 1/2] mart: segnala table_rules orfane in validation --- tests/test_validate_layers.py | 18 ++++++++++++++++++ toolkit/mart/validate.py | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/tests/test_validate_layers.py b/tests/test_validate_layers.py index b83aa55..4792cff 100644 --- a/tests/test_validate_layers.py +++ b/tests/test_validate_layers.py @@ -98,6 +98,24 @@ def test_validate_mart_min_rows_rule(tmp_path: Path): assert ok.ok is True +def test_validate_mart_warns_on_orphan_table_rules_against_declared_tables(tmp_path: Path): + d = tmp_path / "mart" + d.mkdir(parents=True, exist_ok=True) + + _write_parquet(d / "foo.parquet", "CREATE TABLE t AS SELECT 1 AS k") + + result = validate_mart( + d, + declared_tables=["foo"], + table_rules={"bar": {"min_rows": 1}}, + ) + + assert result.ok is True + assert any("not declared in mart.tables" in warning for warning in result.warnings) + assert result.summary["declared_tables"] == ["foo"] + assert result.summary["orphan_table_rules"] == ["bar"] + + def test_validate_mart_report_uses_root_relative_dir(tmp_path: Path): root = tmp_path / "root" mart_dir = root / "data" / "mart" / "demo" / "2024" diff --git a/toolkit/mart/validate.py b/toolkit/mart/validate.py index 0666ee7..11d7bad 100644 --- a/toolkit/mart/validate.py +++ b/toolkit/mart/validate.py @@ -28,6 +28,7 @@ def validate_mart( *, root: str | Path | None = None, table_rules: dict[str, MartTableRuleConfig | dict[str, Any]] | None = None, + declared_tables: list[str] | None = None, ) -> ValidationResult: """ Validate MART folder with optional per-table rules. @@ -65,12 +66,21 @@ def validate_mart( existing_files = sorted(d.glob("*.parquet")) existing_tables = sorted([p.stem for p in existing_files]) + declared_tables = sorted(set(declared_tables or [])) # Required tables presence missing = [t for t in required_tables if t not in existing_tables] if missing: errors.append(f"Missing required MART tables: {missing}") + if declared_tables: + orphan_rules = sorted(table for table in table_rules.keys() if table not in declared_tables) + if orphan_rules: + warnings.append( + "MART table_rules reference tables not declared in mart.tables: " + f"{orphan_rules}" + ) + con = duckdb.connect(":memory:") row_counts: dict[str, int] = {} per_table: dict[str, Any] = {} @@ -182,7 +192,9 @@ def validate_mart( "dir": dir_value, "tables": existing_tables, "required_tables": required_tables, + "declared_tables": declared_tables, "row_counts": row_counts, + "orphan_table_rules": [table for table in table_rules.keys() if table not in declared_tables], "table_rules": { table: { "required_columns": rule.required_columns, @@ -205,6 +217,11 @@ def run_mart_validation(cfg, year: int, logger) -> dict[str, Any]: mart_dir = layer_year_dir(cfg.root, "mart", cfg.dataset, year) mart_cfg: dict[str, Any] = cfg.mart or {} + declared_tables = [ + table.get("name") + for table in mart_cfg.get("tables", []) + if isinstance(table, dict) and table.get("name") + ] spec = MartValidationSpec.model_validate( { "required_tables": mart_cfg.get("required_tables"), @@ -217,6 +234,7 @@ def run_mart_validation(cfg, year: int, logger) -> dict[str, Any]: required_tables=spec.required_tables, root=cfg.root, table_rules=spec.validate.table_rules, + declared_tables=declared_tables, ) report = write_validation_json(Path(mart_dir) / "_validate" / "mart_validation.json", result) From edfc732999f9dace70767288a138b42eb2a8afb2 Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Wed, 11 Mar 2026 22:32:46 +0000 Subject: [PATCH 2/2] mart: riusa orphan_rules nel summary di validation --- toolkit/mart/validate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolkit/mart/validate.py b/toolkit/mart/validate.py index 11d7bad..d2d3752 100644 --- a/toolkit/mart/validate.py +++ b/toolkit/mart/validate.py @@ -67,6 +67,7 @@ def validate_mart( existing_files = sorted(d.glob("*.parquet")) existing_tables = sorted([p.stem for p in existing_files]) declared_tables = sorted(set(declared_tables or [])) + orphan_rules: list[str] = [] # Required tables presence missing = [t for t in required_tables if t not in existing_tables] @@ -194,7 +195,7 @@ def validate_mart( "required_tables": required_tables, "declared_tables": declared_tables, "row_counts": row_counts, - "orphan_table_rules": [table for table in table_rules.keys() if table not in declared_tables], + "orphan_table_rules": orphan_rules, "table_rules": { table: { "required_columns": rule.required_columns,