22
33import json
44from pathlib import Path
5+ from typing import Any
56
67import typer
78
89from toolkit .core .config import load_config
9- from toolkit .core .paths import layer_year_dir
10+ from toolkit .core .paths import layer_dataset_dir , layer_year_dir
1011from toolkit .core .run_context import get_run_dir , latest_run , read_run_record
1112
1213
@@ -48,6 +49,174 @@ def _raw_hints(root: Path, dataset: str, year: int) -> dict[str, object]:
4849 }
4950
5051
52+ def _layer_artifacts_dir (root : Path , dataset : str , year : int , layer : str ) -> Path :
53+ if layer == "cross_year" :
54+ return layer_dataset_dir (root , "cross" , dataset )
55+ return layer_year_dir (root , layer , dataset , year )
56+
57+
58+ def _validation_counts (
59+ validation_payload : dict [str , Any ] | None ,
60+ manifest_payload : dict [str , Any ] | None ,
61+ record_summary : dict [str , Any ] | None ,
62+ ) -> tuple [bool | None , int | None , int | None ]:
63+ if validation_payload is not None :
64+ return (
65+ validation_payload .get ("ok" ),
66+ len (validation_payload .get ("errors" ) or []),
67+ len (validation_payload .get ("warnings" ) or []),
68+ )
69+
70+ manifest_summary = (manifest_payload or {}).get ("summary" ) or {}
71+ if manifest_summary :
72+ return (
73+ manifest_summary .get ("ok" ),
74+ manifest_summary .get ("errors_count" ),
75+ manifest_summary .get ("warnings_count" ),
76+ )
77+
78+ record_summary = record_summary or {}
79+ if record_summary :
80+ return (
81+ record_summary .get ("passed" ),
82+ record_summary .get ("errors_count" ),
83+ record_summary .get ("warnings_count" ),
84+ )
85+
86+ return None , None , None
87+
88+
89+ def _layer_validation_summary (
90+ root : Path ,
91+ dataset : str ,
92+ year : int ,
93+ layer : str ,
94+ record : dict [str , Any ],
95+ ) -> dict [str , Any ] | None :
96+ layer_dir = _layer_artifacts_dir (root , dataset , year , layer )
97+ manifest_payload = _read_json (layer_dir / "manifest.json" )
98+ validation_rel = (manifest_payload or {}).get ("validation" )
99+ validation_payload = None
100+ validation_path = None
101+ if isinstance (validation_rel , str ) and validation_rel .strip ():
102+ validation_path = layer_dir / validation_rel
103+ validation_payload = _read_json (validation_path )
104+
105+ record_summary = (record .get ("validations" ) or {}).get (layer , {})
106+ ok , errors_count , warnings_count = _validation_counts (
107+ validation_payload ,
108+ manifest_payload ,
109+ record_summary if isinstance (record_summary , dict ) else {},
110+ )
111+
112+ has_any_data = any (
113+ [
114+ manifest_payload is not None ,
115+ validation_payload is not None ,
116+ bool (record_summary ),
117+ layer_dir .exists (),
118+ ]
119+ )
120+ if not has_any_data :
121+ return None
122+
123+ warnings = []
124+ errors = []
125+ details : list [str ] = []
126+ if validation_payload is not None :
127+ warnings = [str (item ) for item in (validation_payload .get ("warnings" ) or [])]
128+ errors = [str (item ) for item in (validation_payload .get ("errors" ) or [])]
129+
130+ if validation_path is not None and validation_payload is None :
131+ details .append (f"validation_missing={ validation_path .name } " )
132+
133+ outputs = (manifest_payload or {}).get ("outputs" ) or []
134+ if isinstance (outputs , list ):
135+ missing_outputs = []
136+ for entry in outputs :
137+ if not isinstance (entry , dict ):
138+ continue
139+ file_name = entry .get ("file" )
140+ if isinstance (file_name , str ) and file_name and not (layer_dir / file_name ).exists ():
141+ missing_outputs .append (file_name )
142+ if missing_outputs :
143+ details .append (f"missing_outputs={ ', ' .join (missing_outputs )} " )
144+
145+ summary = (validation_payload or {}).get ("summary" ) or {}
146+ if layer == "clean" :
147+ required = summary .get ("required" ) or []
148+ columns = summary .get ("columns" ) or []
149+ if isinstance (required , list ) and isinstance (columns , list ):
150+ missing_columns = [column for column in required if column not in set (columns )]
151+ if missing_columns :
152+ details .append (f"missing_columns={ ', ' .join (str (column ) for column in missing_columns )} " )
153+ if layer in {"mart" , "cross_year" }:
154+ required_tables = summary .get ("required_tables" ) or []
155+ tables = summary .get ("tables" ) or []
156+ if isinstance (required_tables , list ) and isinstance (tables , list ):
157+ missing_tables = [table for table in required_tables if table not in set (tables )]
158+ if missing_tables :
159+ details .append (f"missing_tables={ ', ' .join (str (table ) for table in missing_tables )} " )
160+
161+ if ok is True :
162+ state = "passed"
163+ elif ok is False :
164+ state = "failed"
165+ elif manifest_payload is not None :
166+ state = "not_validated"
167+ else :
168+ state = "unknown"
169+
170+ return {
171+ "layer" : layer ,
172+ "state" : state ,
173+ "warnings_count" : warnings_count ,
174+ "errors_count" : errors_count ,
175+ "has_warnings" : bool (warnings_count ),
176+ "warning_items" : warnings ,
177+ "error_items" : errors ,
178+ "details" : details ,
179+ }
180+
181+
182+ def _print_validation_summary (
183+ root : Path ,
184+ dataset : str ,
185+ year : int ,
186+ record : dict [str , Any ],
187+ has_cross_year : bool ,
188+ ) -> None :
189+ summaries : list [dict [str , Any ]] = []
190+ for layer in ("clean" , "mart" ):
191+ summary = _layer_validation_summary (root , dataset , year , layer , record )
192+ if summary is not None :
193+ summaries .append (summary )
194+
195+ if has_cross_year :
196+ summary = _layer_validation_summary (root , dataset , year , "cross_year" , record )
197+ if summary is not None :
198+ summaries .append (summary )
199+
200+ if not summaries :
201+ return
202+
203+ typer .echo ("" )
204+ typer .echo ("validation_summary:" )
205+ for summary in summaries :
206+ warnings_count = summary .get ("warnings_count" )
207+ errors_count = summary .get ("errors_count" )
208+ typer .echo (
209+ f" { summary ['layer' ]} : "
210+ f"state={ summary ['state' ]} "
211+ f"warnings={ warnings_count if warnings_count is not None else '?' } "
212+ f"errors={ errors_count if errors_count is not None else '?' } "
213+ )
214+ if summary .get ("has_warnings" ):
215+ typer .echo (" warnings_present: yes" )
216+ for detail in summary .get ("details" ) or []:
217+ typer .echo (f" { detail } " )
218+
219+
51220def status (
52221 dataset : str = typer .Option (..., "--dataset" , help = "Dataset name" ),
53222 year : int = typer .Option (..., "--year" , help = "Dataset year" ),
@@ -66,6 +235,7 @@ def status(
66235 cfg = load_config (config , strict_config = strict_config_flag )
67236 run_dir = get_run_dir (cfg .root , dataset , year )
68237 record = read_run_record (run_dir , run_id ) if run_id else latest_run (run_dir )
238+ has_cross_year = bool ((cfg .cross_year or {}).get ("tables" ))
69239
70240 typer .echo (f"dataset: { record .get ('dataset' )} " )
71241 typer .echo (f"year: { record .get ('year' )} " )
@@ -93,6 +263,7 @@ def status(
93263 typer .echo ("layer layer_status validation_passed errors_count warnings_count" )
94264 for layer in ("raw" , "clean" , "mart" ):
95265 typer .echo (_layer_row (record , layer ))
266+ _print_validation_summary (Path (cfg .root ), dataset , year , record , has_cross_year )
96267
97268 if record .get ("status" ) == "FAILED" and record .get ("error" ):
98269 typer .echo ("" )
0 commit comments