Skip to content

Commit 008c575

Browse files
authored
fix(cli): parse nested spec results (#1)
Handle nested acceptance result blocks and self_eval totals in complete/report. Also count phase statuses from the phases block only and cover the regressions with smoke tests.
1 parent c7e43ed commit 008c575

2 files changed

Lines changed: 310 additions & 32 deletions

File tree

cli/trellis

Lines changed: 92 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -267,28 +267,89 @@ def yaml_read_nested(text, parent, field):
267267
return None
268268

269269

270+
def parse_numeric_scalar(value):
271+
"""Parse a YAML scalar that should contain a numeric value."""
272+
if value in (None, "", "null", "{}"):
273+
return None
274+
try:
275+
return float(str(value).strip().strip('"').strip("'"))
276+
except (TypeError, ValueError):
277+
return None
278+
279+
280+
def extract_self_eval_score(text):
281+
"""Read the recorded self-eval score from supported spec shapes."""
282+
for parent in ("self_eval", "perf_eval"):
283+
for field in ("total", "score"):
284+
score = parse_numeric_scalar(yaml_read_nested(text, parent, field))
285+
if score is not None:
286+
return score
287+
score = parse_numeric_scalar(yaml_read_field(text, parent))
288+
if score is not None:
289+
return score
290+
291+
return parse_numeric_scalar(yaml_read_field(text, "score"))
292+
293+
294+
def parse_phase_statuses(text):
295+
"""Parse phase statuses from the phases block without counting unrelated statuses."""
296+
lines = text.splitlines()
297+
statuses = []
298+
in_phases = False
299+
i = 0
300+
301+
while i < len(lines):
302+
line = lines[i]
303+
stripped = line.strip()
304+
indent = len(line) - len(line.lstrip())
305+
306+
if not in_phases:
307+
if re.match(r'^phases:\s*$', line):
308+
in_phases = True
309+
i += 1
310+
continue
311+
312+
if stripped and indent == 0:
313+
break
314+
315+
match = re.match(r'^\s+-\s+id:\s*"?(phase\d+)"?', line)
316+
if not match:
317+
i += 1
318+
continue
319+
320+
item_indent = len(match.group(0)) - len(match.group(0).lstrip())
321+
status = "pending"
322+
i += 1
323+
324+
while i < len(lines):
325+
field_line = lines[i]
326+
field_stripped = field_line.strip()
327+
field_indent = len(field_line) - len(field_line.lstrip())
328+
329+
if field_stripped and field_indent <= item_indent:
330+
break
331+
332+
status_match = re.match(
333+
r'^\s+status:\s*"?(pending|in_progress|completed|failed|skipped)"?',
334+
field_line,
335+
)
336+
if status_match:
337+
status = status_match.group(1)
338+
339+
i += 1
340+
341+
statuses.append(status)
342+
343+
return statuses
344+
345+
270346
def count_phases(text):
271347
"""Count phases and their statuses."""
272-
total = 0
273-
completed = 0
274-
failed = 0
275-
in_progress = 0
276-
for m in re.finditer(r'^\s+-\s+id:\s*"?phase\d+"?', text, re.MULTILINE):
277-
total += 1
278-
for m in re.finditer(r'^\s+status:\s*"?completed"?', text, re.MULTILINE):
279-
completed += 1
280-
for m in re.finditer(r'^\s+status:\s*"?failed"?', text, re.MULTILINE):
281-
failed += 1
282-
for m in re.finditer(r'^\s+status:\s*"?in_progress"?', text, re.MULTILINE):
283-
in_progress += 1
284-
# Subtract the top-level status from phase counts
285-
top_status = yaml_read_field(text, "status")
286-
if top_status == "completed":
287-
completed = max(0, completed - 1)
288-
elif top_status == "failed":
289-
failed = max(0, failed - 1)
290-
elif top_status == "in_progress":
291-
in_progress = max(0, in_progress - 1)
348+
phase_statuses = parse_phase_statuses(text)
349+
total = len(phase_statuses)
350+
completed = sum(1 for status in phase_statuses if status == "completed")
351+
failed = sum(1 for status in phase_statuses if status == "failed")
352+
in_progress = sum(1 for status in phase_statuses if status == "in_progress")
292353
return total, completed, failed, in_progress
293354

294355

@@ -1073,18 +1134,16 @@ def cmd_start(args):
10731134

10741135
def check_self_eval(text, task_id):
10751136
"""Warn if self-eval looks like a rubber stamp."""
1076-
# Look for self_eval or perf_eval score
1077-
score_match = re.search(r'(?:self_eval|perf_eval|score):\s*(\d+)', text)
1078-
if not score_match:
1137+
score = extract_self_eval_score(text)
1138+
if score is None:
10791139
print(f" {c(C_YELLOW, 'warn')}: no self-eval score found in spec")
10801140
return
1081-
1082-
score = int(score_match.group(1))
10831141
has_deviations = bool(re.search(r'deviations:', text, re.MULTILINE))
10841142
has_improvements = bool(re.search(r'improvements:', text, re.MULTILINE))
1143+
score_display = int(score) if float(score).is_integer() else score
10851144

10861145
if score >= 9 and not has_deviations and not has_improvements:
1087-
print(f" {c(C_YELLOW, 'warn')}: self-eval {score}/10 with no deviations or improvements noted")
1146+
print(f" {c(C_YELLOW, 'warn')}: self-eval {score_display}/10 with no deviations or improvements noted")
10881147
print(f" scores above 8 should document at least one deviation or improvement")
10891148
elif score == 10:
10901149
print(f" {c(C_YELLOW, 'note')}: perfect 10/10 - are you sure? 10 means flawless with improvements beyond spec")
@@ -1112,7 +1171,7 @@ def cmd_complete(args):
11121171
text = spec.read_text()
11131172

11141173
# Check for exec results - warn if no criteria were executed
1115-
has_results = bool(re.search(r'result:\s*"?(pass|fail)"?', text, re.MULTILINE))
1174+
has_results = any(ac.get("result") in ("pass", "fail") for ac in parse_acceptance_criteria(text))
11161175
if not has_results:
11171176
print(f" {c(C_YELLOW, 'warn')}: no exec results recorded. Run '{c(C_BOLD, f'trellis exec {args.task_id}')}' first")
11181177

@@ -2333,13 +2392,14 @@ def cmd_report(args):
23332392
risks[risk] = risks.get(risk, 0) + 1
23342393

23352394
# Self-eval scores
2336-
score_match = re.search(r'total:\s*(\d+)', text)
2337-
if score_match:
2338-
self_eval_scores.append(int(score_match.group(1)))
2395+
score = extract_self_eval_score(text)
2396+
if score is not None:
2397+
self_eval_scores.append(score)
23392398

23402399
# Exec results
2341-
passes = len(re.findall(r'result:\s*"?pass"?', text))
2342-
fails = len(re.findall(r'result:\s*"?fail"?', text))
2400+
criteria = parse_acceptance_criteria(text)
2401+
passes = sum(1 for ac in criteria if ac.get("result") == "pass")
2402+
fails = sum(1 for ac in criteria if ac.get("result") == "fail")
23432403
if passes or fails:
23442404
exec_pass += passes
23452405
exec_fail += fails

0 commit comments

Comments
 (0)