Skip to content

Commit dcdae74

Browse files
authored
feat: aggiungi il summary validation a status (#46)
1 parent e2802e1 commit dcdae74

2 files changed

Lines changed: 322 additions & 1 deletion

File tree

tests/test_cli_status.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,153 @@ def test_status_reports_raw_hints_when_raw_artifacts_exist(tmp_path: Path, monke
155155
assert "delim: ;" in result.output
156156
assert "skip: 1" in result.output
157157
assert "header_preamble_detected" in result.output
158+
159+
160+
def test_status_reports_validation_summary_from_layer_artifacts(tmp_path: Path, monkeypatch) -> None:
161+
project_dir = tmp_path / "project"
162+
config_path = project_dir / "dataset.yml"
163+
project_dir.mkdir()
164+
165+
config_path.write_text(
166+
"""
167+
root: "./out"
168+
dataset:
169+
name: demo_ds
170+
years: [2022]
171+
raw: {}
172+
clean:
173+
sql: "sql/clean.sql"
174+
required_columns: ["id", "value"]
175+
mart:
176+
tables:
177+
- name: mart_ok
178+
sql: "sql/mart/mart_ok.sql"
179+
required_tables: ["mart_ok", "mart_missing"]
180+
cross_year:
181+
tables:
182+
- name: cross_ok
183+
sql: "sql/cross/cross_ok.sql"
184+
""".strip(),
185+
encoding="utf-8",
186+
)
187+
188+
sql_mart_dir = project_dir / "sql" / "mart"
189+
sql_cross_dir = project_dir / "sql" / "cross"
190+
sql_mart_dir.mkdir(parents=True, exist_ok=True)
191+
sql_cross_dir.mkdir(parents=True, exist_ok=True)
192+
(project_dir / "sql" / "clean.sql").write_text("select 1 as value", encoding="utf-8")
193+
(sql_mart_dir / "mart_ok.sql").write_text("select * from clean_input", encoding="utf-8")
194+
(sql_cross_dir / "cross_ok.sql").write_text("select * from clean_input", encoding="utf-8")
195+
196+
clean_dir = project_dir / "out" / "data" / "clean" / "demo_ds" / "2022"
197+
mart_dir = project_dir / "out" / "data" / "mart" / "demo_ds" / "2022"
198+
cross_dir = project_dir / "out" / "data" / "cross" / "demo_ds"
199+
(clean_dir / "_validate").mkdir(parents=True, exist_ok=True)
200+
(mart_dir / "_validate").mkdir(parents=True, exist_ok=True)
201+
(cross_dir / "_validate").mkdir(parents=True, exist_ok=True)
202+
203+
(clean_dir / "demo_ds_2022_clean.parquet").write_text("placeholder", encoding="utf-8")
204+
(cross_dir / "cross_ok.parquet").write_text("placeholder", encoding="utf-8")
205+
206+
(clean_dir / "manifest.json").write_text(
207+
json.dumps(
208+
{
209+
"validation": "_validate/clean_validation.json",
210+
"summary": {"ok": True, "errors_count": 0, "warnings_count": 1},
211+
"outputs": [{"file": "demo_ds_2022_clean.parquet"}],
212+
},
213+
indent=2,
214+
),
215+
encoding="utf-8",
216+
)
217+
(clean_dir / "_validate" / "clean_validation.json").write_text(
218+
json.dumps(
219+
{
220+
"ok": True,
221+
"errors": [],
222+
"warnings": ["header_preamble_detected"],
223+
"summary": {
224+
"required": ["id", "value"],
225+
"columns": ["id"],
226+
},
227+
},
228+
indent=2,
229+
),
230+
encoding="utf-8",
231+
)
232+
233+
(mart_dir / "manifest.json").write_text(
234+
json.dumps(
235+
{
236+
"validation": "_validate/mart_validation.json",
237+
"summary": {"ok": False, "errors_count": 1, "warnings_count": 1},
238+
"outputs": [{"file": "mart_ok.parquet"}],
239+
},
240+
indent=2,
241+
),
242+
encoding="utf-8",
243+
)
244+
(mart_dir / "_validate" / "mart_validation.json").write_text(
245+
json.dumps(
246+
{
247+
"ok": False,
248+
"errors": ["Missing required MART tables: ['mart_missing']"],
249+
"warnings": ["MART table_rules reference tables not declared in mart.tables: ['mart_extra']"],
250+
"summary": {
251+
"required_tables": ["mart_ok", "mart_missing"],
252+
"tables": ["mart_ok"],
253+
"per_table": {},
254+
},
255+
},
256+
indent=2,
257+
),
258+
encoding="utf-8",
259+
)
260+
261+
(cross_dir / "manifest.json").write_text(
262+
json.dumps(
263+
{
264+
"validation": "_validate/cross_validation.json",
265+
"summary": {"ok": True, "errors_count": 0, "warnings_count": 0},
266+
"outputs": [{"file": "cross_ok.parquet"}],
267+
},
268+
indent=2,
269+
),
270+
encoding="utf-8",
271+
)
272+
(cross_dir / "_validate" / "cross_validation.json").write_text(
273+
json.dumps(
274+
{
275+
"ok": True,
276+
"errors": [],
277+
"warnings": [],
278+
"summary": {
279+
"required_tables": [],
280+
"tables": ["cross_ok"],
281+
},
282+
},
283+
indent=2,
284+
),
285+
encoding="utf-8",
286+
)
287+
288+
run_dir = get_run_dir(project_dir / "out", "demo_ds", 2022)
289+
_write_run_record(run_dir / "run-123.json", "run-123", "2026-03-04T10:00:00+00:00", "FAILED")
290+
291+
monkeypatch.chdir(tmp_path)
292+
runner = CliRunner()
293+
294+
result = runner.invoke(
295+
app,
296+
["status", "--dataset", "demo_ds", "--year", "2022", "--latest", "--config", str(config_path)],
297+
)
298+
299+
assert result.exit_code == 0
300+
assert "validation_summary:" in result.output
301+
assert "clean: state=passed warnings=1 errors=0" in result.output
302+
assert "warnings_present: yes" in result.output
303+
assert "missing_columns=value" in result.output
304+
assert "mart: state=failed warnings=1 errors=1" in result.output
305+
assert "missing_tables=mart_missing" in result.output
306+
assert "missing_outputs=mart_ok.parquet" in result.output
307+
assert "cross_year: state=passed warnings=0 errors=0" in result.output

toolkit/cli/cmd_status.py

Lines changed: 172 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
import json
44
from pathlib import Path
5+
from typing import Any
56

67
import typer
78

89
from toolkit.core.config import load_config
9-
from toolkit.core.paths import layer_year_dir
10+
from toolkit.core.paths import layer_dataset_dir, layer_year_dir
1011
from toolkit.core.run_context import get_run_dir, latest_run, read_run_record
1112

1213

@@ -48,6 +49,174 @@ def _raw_hints(root: Path, dataset: str, year: int) -> dict[str, object]:
4849
}
4950

5051

52+
def _layer_artifacts_dir(root: Path, dataset: str, year: int, layer: str) -> Path:
53+
if layer == "cross_year":
54+
return layer_dataset_dir(root, "cross", dataset)
55+
return layer_year_dir(root, layer, dataset, year)
56+
57+
58+
def _validation_counts(
59+
validation_payload: dict[str, Any] | None,
60+
manifest_payload: dict[str, Any] | None,
61+
record_summary: dict[str, Any] | None,
62+
) -> tuple[bool | None, int | None, int | None]:
63+
if validation_payload is not None:
64+
return (
65+
validation_payload.get("ok"),
66+
len(validation_payload.get("errors") or []),
67+
len(validation_payload.get("warnings") or []),
68+
)
69+
70+
manifest_summary = (manifest_payload or {}).get("summary") or {}
71+
if manifest_summary:
72+
return (
73+
manifest_summary.get("ok"),
74+
manifest_summary.get("errors_count"),
75+
manifest_summary.get("warnings_count"),
76+
)
77+
78+
record_summary = record_summary or {}
79+
if record_summary:
80+
return (
81+
record_summary.get("passed"),
82+
record_summary.get("errors_count"),
83+
record_summary.get("warnings_count"),
84+
)
85+
86+
return None, None, None
87+
88+
89+
def _layer_validation_summary(
90+
root: Path,
91+
dataset: str,
92+
year: int,
93+
layer: str,
94+
record: dict[str, Any],
95+
) -> dict[str, Any] | None:
96+
layer_dir = _layer_artifacts_dir(root, dataset, year, layer)
97+
manifest_payload = _read_json(layer_dir / "manifest.json")
98+
validation_rel = (manifest_payload or {}).get("validation")
99+
validation_payload = None
100+
validation_path = None
101+
if isinstance(validation_rel, str) and validation_rel.strip():
102+
validation_path = layer_dir / validation_rel
103+
validation_payload = _read_json(validation_path)
104+
105+
record_summary = (record.get("validations") or {}).get(layer, {})
106+
ok, errors_count, warnings_count = _validation_counts(
107+
validation_payload,
108+
manifest_payload,
109+
record_summary if isinstance(record_summary, dict) else {},
110+
)
111+
112+
has_any_data = any(
113+
[
114+
manifest_payload is not None,
115+
validation_payload is not None,
116+
bool(record_summary),
117+
layer_dir.exists(),
118+
]
119+
)
120+
if not has_any_data:
121+
return None
122+
123+
warnings = []
124+
errors = []
125+
details: list[str] = []
126+
if validation_payload is not None:
127+
warnings = [str(item) for item in (validation_payload.get("warnings") or [])]
128+
errors = [str(item) for item in (validation_payload.get("errors") or [])]
129+
130+
if validation_path is not None and validation_payload is None:
131+
details.append(f"validation_missing={validation_path.name}")
132+
133+
outputs = (manifest_payload or {}).get("outputs") or []
134+
if isinstance(outputs, list):
135+
missing_outputs = []
136+
for entry in outputs:
137+
if not isinstance(entry, dict):
138+
continue
139+
file_name = entry.get("file")
140+
if isinstance(file_name, str) and file_name and not (layer_dir / file_name).exists():
141+
missing_outputs.append(file_name)
142+
if missing_outputs:
143+
details.append(f"missing_outputs={', '.join(missing_outputs)}")
144+
145+
summary = (validation_payload or {}).get("summary") or {}
146+
if layer == "clean":
147+
required = summary.get("required") or []
148+
columns = summary.get("columns") or []
149+
if isinstance(required, list) and isinstance(columns, list):
150+
missing_columns = [column for column in required if column not in set(columns)]
151+
if missing_columns:
152+
details.append(f"missing_columns={', '.join(str(column) for column in missing_columns)}")
153+
if layer in {"mart", "cross_year"}:
154+
required_tables = summary.get("required_tables") or []
155+
tables = summary.get("tables") or []
156+
if isinstance(required_tables, list) and isinstance(tables, list):
157+
missing_tables = [table for table in required_tables if table not in set(tables)]
158+
if missing_tables:
159+
details.append(f"missing_tables={', '.join(str(table) for table in missing_tables)}")
160+
161+
if ok is True:
162+
state = "passed"
163+
elif ok is False:
164+
state = "failed"
165+
elif manifest_payload is not None:
166+
state = "not_validated"
167+
else:
168+
state = "unknown"
169+
170+
return {
171+
"layer": layer,
172+
"state": state,
173+
"warnings_count": warnings_count,
174+
"errors_count": errors_count,
175+
"has_warnings": bool(warnings_count),
176+
"warning_items": warnings,
177+
"error_items": errors,
178+
"details": details,
179+
}
180+
181+
182+
def _print_validation_summary(
183+
root: Path,
184+
dataset: str,
185+
year: int,
186+
record: dict[str, Any],
187+
has_cross_year: bool,
188+
) -> None:
189+
summaries: list[dict[str, Any]] = []
190+
for layer in ("clean", "mart"):
191+
summary = _layer_validation_summary(root, dataset, year, layer, record)
192+
if summary is not None:
193+
summaries.append(summary)
194+
195+
if has_cross_year:
196+
summary = _layer_validation_summary(root, dataset, year, "cross_year", record)
197+
if summary is not None:
198+
summaries.append(summary)
199+
200+
if not summaries:
201+
return
202+
203+
typer.echo("")
204+
typer.echo("validation_summary:")
205+
for summary in summaries:
206+
warnings_count = summary.get("warnings_count")
207+
errors_count = summary.get("errors_count")
208+
typer.echo(
209+
f" {summary['layer']}: "
210+
f"state={summary['state']} "
211+
f"warnings={warnings_count if warnings_count is not None else '?'} "
212+
f"errors={errors_count if errors_count is not None else '?'}"
213+
)
214+
if summary.get("has_warnings"):
215+
typer.echo(" warnings_present: yes")
216+
for detail in summary.get("details") or []:
217+
typer.echo(f" {detail}")
218+
219+
51220
def status(
52221
dataset: str = typer.Option(..., "--dataset", help="Dataset name"),
53222
year: int = typer.Option(..., "--year", help="Dataset year"),
@@ -66,6 +235,7 @@ def status(
66235
cfg = load_config(config, strict_config=strict_config_flag)
67236
run_dir = get_run_dir(cfg.root, dataset, year)
68237
record = read_run_record(run_dir, run_id) if run_id else latest_run(run_dir)
238+
has_cross_year = bool((cfg.cross_year or {}).get("tables"))
69239

70240
typer.echo(f"dataset: {record.get('dataset')}")
71241
typer.echo(f"year: {record.get('year')}")
@@ -93,6 +263,7 @@ def status(
93263
typer.echo("layer layer_status validation_passed errors_count warnings_count")
94264
for layer in ("raw", "clean", "mart"):
95265
typer.echo(_layer_row(record, layer))
266+
_print_validation_summary(Path(cfg.root), dataset, year, record, has_cross_year)
96267

97268
if record.get("status") == "FAILED" and record.get("error"):
98269
typer.echo("")

0 commit comments

Comments
 (0)