Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions tests/test_validate_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ def test_validate_mart_min_rows_rule(tmp_path: Path):
assert ok.ok is True


def test_validate_mart_warns_on_orphan_table_rules_against_declared_tables(tmp_path: Path):
d = tmp_path / "mart"
d.mkdir(parents=True, exist_ok=True)

_write_parquet(d / "foo.parquet", "CREATE TABLE t AS SELECT 1 AS k")

result = validate_mart(
d,
declared_tables=["foo"],
table_rules={"bar": {"min_rows": 1}},
)

assert result.ok is True
assert any("not declared in mart.tables" in warning for warning in result.warnings)
assert result.summary["declared_tables"] == ["foo"]
assert result.summary["orphan_table_rules"] == ["bar"]


def test_validate_mart_report_uses_root_relative_dir(tmp_path: Path):
root = tmp_path / "root"
mart_dir = root / "data" / "mart" / "demo" / "2024"
Expand Down
19 changes: 19 additions & 0 deletions toolkit/mart/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def validate_mart(
*,
root: str | Path | None = None,
table_rules: dict[str, MartTableRuleConfig | dict[str, Any]] | None = None,
declared_tables: list[str] | None = None,
) -> ValidationResult:
"""
Validate MART folder with optional per-table rules.
Expand Down Expand Up @@ -65,12 +66,22 @@ def validate_mart(

existing_files = sorted(d.glob("*.parquet"))
existing_tables = sorted([p.stem for p in existing_files])
declared_tables = sorted(set(declared_tables or []))
orphan_rules: list[str] = []

# Required tables presence
missing = [t for t in required_tables if t not in existing_tables]
if missing:
errors.append(f"Missing required MART tables: {missing}")

if declared_tables:
orphan_rules = sorted(table for table in table_rules.keys() if table not in declared_tables)
if orphan_rules:
warnings.append(
"MART table_rules reference tables not declared in mart.tables: "
f"{orphan_rules}"
)

con = duckdb.connect(":memory:")
row_counts: dict[str, int] = {}
per_table: dict[str, Any] = {}
Expand Down Expand Up @@ -182,7 +193,9 @@ def validate_mart(
"dir": dir_value,
"tables": existing_tables,
"required_tables": required_tables,
"declared_tables": declared_tables,
"row_counts": row_counts,
"orphan_table_rules": orphan_rules,
"table_rules": {
table: {
"required_columns": rule.required_columns,
Expand All @@ -205,6 +218,11 @@ def run_mart_validation(cfg, year: int, logger) -> dict[str, Any]:
mart_dir = layer_year_dir(cfg.root, "mart", cfg.dataset, year)

mart_cfg: dict[str, Any] = cfg.mart or {}
declared_tables = [
table.get("name")
for table in mart_cfg.get("tables", [])
if isinstance(table, dict) and table.get("name")
]
spec = MartValidationSpec.model_validate(
{
"required_tables": mart_cfg.get("required_tables"),
Expand All @@ -217,6 +235,7 @@ def run_mart_validation(cfg, year: int, logger) -> dict[str, Any]:
required_tables=spec.required_tables,
root=cfg.root,
table_rules=spec.validate.table_rules,
declared_tables=declared_tables,
)

report = write_validation_json(Path(mart_dir) / "_validate" / "mart_validation.json", result)
Expand Down
Loading