@@ -267,28 +267,89 @@ def yaml_read_nested(text, parent, field):
267267 return None
268268
269269
270+ def parse_numeric_scalar (value ):
271+ """Parse a YAML scalar that should contain a numeric value."""
272+ if value in (None , "" , "null" , "{}" ):
273+ return None
274+ try :
275+ return float (str (value ).strip ().strip ('"' ).strip ("'" ))
276+ except (TypeError , ValueError ):
277+ return None
278+
279+
280+ def extract_self_eval_score (text ):
281+ """Read the recorded self-eval score from supported spec shapes."""
282+ for parent in ("self_eval" , "perf_eval" ):
283+ for field in ("total" , "score" ):
284+ score = parse_numeric_scalar (yaml_read_nested (text , parent , field ))
285+ if score is not None :
286+ return score
287+ score = parse_numeric_scalar (yaml_read_field (text , parent ))
288+ if score is not None :
289+ return score
290+
291+ return parse_numeric_scalar (yaml_read_field (text , "score" ))
292+
293+
294+ def parse_phase_statuses (text ):
295+ """Parse phase statuses from the phases block without counting unrelated statuses."""
296+ lines = text .splitlines ()
297+ statuses = []
298+ in_phases = False
299+ i = 0
300+
301+ while i < len (lines ):
302+ line = lines [i ]
303+ stripped = line .strip ()
304+ indent = len (line ) - len (line .lstrip ())
305+
306+ if not in_phases :
307+ if re .match (r'^phases:\s*$' , line ):
308+ in_phases = True
309+ i += 1
310+ continue
311+
312+ if stripped and indent == 0 :
313+ break
314+
315+ match = re .match (r'^\s+-\s+id:\s*"?(phase\d+)"?' , line )
316+ if not match :
317+ i += 1
318+ continue
319+
320+ item_indent = len (match .group (0 )) - len (match .group (0 ).lstrip ())
321+ status = "pending"
322+ i += 1
323+
324+ while i < len (lines ):
325+ field_line = lines [i ]
326+ field_stripped = field_line .strip ()
327+ field_indent = len (field_line ) - len (field_line .lstrip ())
328+
329+ if field_stripped and field_indent <= item_indent :
330+ break
331+
332+ status_match = re .match (
333+ r'^\s+status:\s*"?(pending|in_progress|completed|failed|skipped)"?' ,
334+ field_line ,
335+ )
336+ if status_match :
337+ status = status_match .group (1 )
338+
339+ i += 1
340+
341+ statuses .append (status )
342+
343+ return statuses
344+
345+
270346def count_phases (text ):
271347 """Count phases and their statuses."""
272- total = 0
273- completed = 0
274- failed = 0
275- in_progress = 0
276- for m in re .finditer (r'^\s+-\s+id:\s*"?phase\d+"?' , text , re .MULTILINE ):
277- total += 1
278- for m in re .finditer (r'^\s+status:\s*"?completed"?' , text , re .MULTILINE ):
279- completed += 1
280- for m in re .finditer (r'^\s+status:\s*"?failed"?' , text , re .MULTILINE ):
281- failed += 1
282- for m in re .finditer (r'^\s+status:\s*"?in_progress"?' , text , re .MULTILINE ):
283- in_progress += 1
284- # Subtract the top-level status from phase counts
285- top_status = yaml_read_field (text , "status" )
286- if top_status == "completed" :
287- completed = max (0 , completed - 1 )
288- elif top_status == "failed" :
289- failed = max (0 , failed - 1 )
290- elif top_status == "in_progress" :
291- in_progress = max (0 , in_progress - 1 )
348+ phase_statuses = parse_phase_statuses (text )
349+ total = len (phase_statuses )
350+ completed = sum (1 for status in phase_statuses if status == "completed" )
351+ failed = sum (1 for status in phase_statuses if status == "failed" )
352+ in_progress = sum (1 for status in phase_statuses if status == "in_progress" )
292353 return total , completed , failed , in_progress
293354
294355
@@ -1073,18 +1134,16 @@ def cmd_start(args):
10731134
10741135def check_self_eval (text , task_id ):
10751136 """Warn if self-eval looks like a rubber stamp."""
1076- # Look for self_eval or perf_eval score
1077- score_match = re .search (r'(?:self_eval|perf_eval|score):\s*(\d+)' , text )
1078- if not score_match :
1137+ score = extract_self_eval_score (text )
1138+ if score is None :
10791139 print (f" { c (C_YELLOW , 'warn' )} : no self-eval score found in spec" )
10801140 return
1081-
1082- score = int (score_match .group (1 ))
10831141 has_deviations = bool (re .search (r'deviations:' , text , re .MULTILINE ))
10841142 has_improvements = bool (re .search (r'improvements:' , text , re .MULTILINE ))
1143+ score_display = int (score ) if float (score ).is_integer () else score
10851144
10861145 if score >= 9 and not has_deviations and not has_improvements :
1087- print (f" { c (C_YELLOW , 'warn' )} : self-eval { score } /10 with no deviations or improvements noted" )
1146+ print (f" { c (C_YELLOW , 'warn' )} : self-eval { score_display } /10 with no deviations or improvements noted" )
10881147 print (f" scores above 8 should document at least one deviation or improvement" )
10891148 elif score == 10 :
10901149 print (f" { c (C_YELLOW , 'note' )} : perfect 10/10 - are you sure? 10 means flawless with improvements beyond spec" )
@@ -1112,7 +1171,7 @@ def cmd_complete(args):
11121171 text = spec .read_text ()
11131172
11141173 # Check for exec results - warn if no criteria were executed
1115- has_results = bool ( re . search ( r' result:\s*"?( pass|fail)"?' , text , re . MULTILINE ))
1174+ has_results = any ( ac . get ( " result" ) in ( " pass" , "fail" ) for ac in parse_acceptance_criteria ( text ))
11161175 if not has_results :
11171176 print (f" { c (C_YELLOW , 'warn' )} : no exec results recorded. Run '{ c (C_BOLD , f'trellis exec { args .task_id } ' )} ' first" )
11181177
@@ -2333,13 +2392,14 @@ def cmd_report(args):
23332392 risks [risk ] = risks .get (risk , 0 ) + 1
23342393
23352394 # Self-eval scores
2336- score_match = re . search ( r'total:\s*(\d+)' , text )
2337- if score_match :
2338- self_eval_scores .append (int ( score_match . group ( 1 )) )
2395+ score = extract_self_eval_score ( text )
2396+ if score is not None :
2397+ self_eval_scores .append (score )
23392398
23402399 # Exec results
2341- passes = len (re .findall (r'result:\s*"?pass"?' , text ))
2342- fails = len (re .findall (r'result:\s*"?fail"?' , text ))
2400+ criteria = parse_acceptance_criteria (text )
2401+ passes = sum (1 for ac in criteria if ac .get ("result" ) == "pass" )
2402+ fails = sum (1 for ac in criteria if ac .get ("result" ) == "fail" )
23432403 if passes or fails :
23442404 exec_pass += passes
23452405 exec_fail += fails
0 commit comments