diff --git a/tests/test_validate_layers.py b/tests/test_validate_layers.py index b83aa55..4792cff 100644 --- a/tests/test_validate_layers.py +++ b/tests/test_validate_layers.py @@ -98,6 +98,24 @@ def test_validate_mart_min_rows_rule(tmp_path: Path): assert ok.ok is True +def test_validate_mart_warns_on_orphan_table_rules_against_declared_tables(tmp_path: Path): + d = tmp_path / "mart" + d.mkdir(parents=True, exist_ok=True) + + _write_parquet(d / "foo.parquet", "CREATE TABLE t AS SELECT 1 AS k") + + result = validate_mart( + d, + declared_tables=["foo"], + table_rules={"bar": {"min_rows": 1}}, + ) + + assert result.ok is True + assert any("not declared in mart.tables" in warning for warning in result.warnings) + assert result.summary["declared_tables"] == ["foo"] + assert result.summary["orphan_table_rules"] == ["bar"] + + def test_validate_mart_report_uses_root_relative_dir(tmp_path: Path): root = tmp_path / "root" mart_dir = root / "data" / "mart" / "demo" / "2024" diff --git a/toolkit/mart/validate.py b/toolkit/mart/validate.py index 0666ee7..d2d3752 100644 --- a/toolkit/mart/validate.py +++ b/toolkit/mart/validate.py @@ -28,6 +28,7 @@ def validate_mart( *, root: str | Path | None = None, table_rules: dict[str, MartTableRuleConfig | dict[str, Any]] | None = None, + declared_tables: list[str] | None = None, ) -> ValidationResult: """ Validate MART folder with optional per-table rules. @@ -65,12 +66,22 @@ def validate_mart( existing_files = sorted(d.glob("*.parquet")) existing_tables = sorted([p.stem for p in existing_files]) + declared_tables = sorted(set(declared_tables or [])) + orphan_rules: list[str] = [] # Required tables presence missing = [t for t in required_tables if t not in existing_tables] if missing: errors.append(f"Missing required MART tables: {missing}") + if declared_tables: + orphan_rules = sorted(table for table in table_rules.keys() if table not in declared_tables) + if orphan_rules: + warnings.append( + "MART table_rules reference tables not declared in mart.tables: " + f"{orphan_rules}" + ) + con = duckdb.connect(":memory:") row_counts: dict[str, int] = {} per_table: dict[str, Any] = {} @@ -182,7 +193,9 @@ def validate_mart( "dir": dir_value, "tables": existing_tables, "required_tables": required_tables, + "declared_tables": declared_tables, "row_counts": row_counts, + "orphan_table_rules": orphan_rules, "table_rules": { table: { "required_columns": rule.required_columns, @@ -205,6 +218,11 @@ def run_mart_validation(cfg, year: int, logger) -> dict[str, Any]: mart_dir = layer_year_dir(cfg.root, "mart", cfg.dataset, year) mart_cfg: dict[str, Any] = cfg.mart or {} + declared_tables = [ + table.get("name") + for table in mart_cfg.get("tables", []) + if isinstance(table, dict) and table.get("name") + ] spec = MartValidationSpec.model_validate( { "required_tables": mart_cfg.get("required_tables"), @@ -217,6 +235,7 @@ def run_mart_validation(cfg, year: int, logger) -> dict[str, Any]: required_tables=spec.required_tables, root=cfg.root, table_rules=spec.validate.table_rules, + declared_tables=declared_tables, ) report = write_validation_json(Path(mart_dir) / "_validate" / "mart_validation.json", result)