From 61ab9df8a15d47048e2c3e58bbe2b0a5b8fa4979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=ADn?= Date: Fri, 6 Mar 2026 16:56:43 +0100 Subject: [PATCH] Handle structured output in review parser Claude can return review data in `structured_output` even when the legacy `result` field is empty. The current parser only looks at `result`, so a valid review payload can be lost and the action can degrade into an empty review. This change checks `structured_output` first, falls back to parsing `result` for compatibility, and fails the review if neither contains a valid payload. The tests now cover the `structured_output` path and the failure case so the action does not silently approve malformed responses. --- claudecode/github_action_audit.py | 46 ++++++---- claudecode/test_claude_runner.py | 60 +++++++++---- claudecode/test_github_action_audit.py | 44 +++++++--- claudecode/test_workflow_integration.py | 107 ++++++++++++++++++++++-- 4 files changed, 208 insertions(+), 49 deletions(-) diff --git a/claudecode/github_action_audit.py b/claudecode/github_action_audit.py index 001ace7..93e82fc 100644 --- a/claudecode/github_action_audit.py +++ b/claudecode/github_action_audit.py @@ -577,7 +577,13 @@ def run_code_review(self, repo_dir: Path, prompt: str) -> Tuple[bool, str, Dict[ # If returncode is 0, extract review findings if result.returncode == 0: - parsed_results = self._extract_review_findings(parsed_result) + try: + parsed_results = self._extract_review_findings(parsed_result) + except ValueError as error: + if attempt == NUM_RETRIES - 1: + return False, str(error), {} + time.sleep(5 * attempt) + continue return True, "", parsed_results # Handle non-zero return codes after parsing @@ -607,22 +613,28 @@ def run_code_review(self, repo_dir: Path, prompt: str) -> Tuple[bool, str, Dict[ def _extract_review_findings(self, claude_output: Any) -> Dict[str, Any]: """Extract review findings and PR summary from Claude's JSON response.""" - if isinstance(claude_output, dict): - # Only accept Claude Code wrapper with result field - # Direct format without wrapper is not supported - if 'result' in claude_output: - result_text = claude_output['result'] - if isinstance(result_text, str): - # Try to extract JSON from the result text - success, result_json = parse_json_with_fallbacks(result_text, "Claude result text") - if success and result_json and 'findings' in result_json and 'pr_summary' in result_json: - return result_json - - # Return empty structure if no findings found - return { - 'findings': [], - 'pr_summary': {} - } + if not isinstance(claude_output, dict): + raise ValueError("Claude did not return a JSON object") + + structured_output = claude_output.get('structured_output') + if ( + isinstance(structured_output, dict) + and 'findings' in structured_output + and 'pr_summary' in structured_output + ): + return structured_output + + # Fall back to the older wrapper shape where the JSON payload + # is serialized into the result field. + result_text = claude_output.get('result') + if isinstance(result_text, str): + success, result_json = parse_json_with_fallbacks(result_text, "Claude result text") + if success and result_json and 'findings' in result_json and 'pr_summary' in result_json: + return result_json + + raise ValueError( + "Claude did not return a valid review payload in structured_output or result" + ) def validate_claude_available(self) -> Tuple[bool, str]: """Validate that Claude Code is available.""" diff --git a/claudecode/test_claude_runner.py b/claudecode/test_claude_runner.py index 296fb78..82aafe4 100644 --- a/claudecode/test_claude_runner.py +++ b/claudecode/test_claude_runner.py @@ -9,6 +9,8 @@ from unittest.mock import Mock, patch from pathlib import Path +import pytest + from claudecode.github_action_audit import SimpleClaudeRunner from claudecode.constants import DEFAULT_CLAUDE_MODEL @@ -179,7 +181,14 @@ def test_run_code_review_large_prompt_warning(self, mock_run, capsys): """Test warning for large prompts.""" mock_run.return_value = Mock( returncode=0, - stdout='{"findings": []}', + stdout=json.dumps({ + "type": "result", + "subtype": "success", + "structured_output": { + "pr_summary": {"overview": "Large prompt test", "file_changes": []}, + "findings": [], + }, + }), stderr='' ) @@ -203,7 +212,18 @@ def test_run_code_review_retry_on_failure(self, mock_run): # First call fails, second succeeds mock_run.side_effect = [ Mock(returncode=1, stdout='', stderr='Temporary error'), - Mock(returncode=0, stdout='{"findings": []}', stderr='') + Mock( + returncode=0, + stdout=json.dumps({ + "type": "result", + "subtype": "success", + "structured_output": { + "pr_summary": {"overview": "Retry test", "file_changes": []}, + "findings": [], + }, + }), + stderr='', + ) ] runner = SimpleClaudeRunner() @@ -304,6 +324,24 @@ def test_extract_review_findings_claude_wrapper(self): result = runner._extract_review_findings(claude_output) assert len(result['findings']) == 1 assert result['findings'][0]['file'] == 'test.py' + + def test_extract_review_findings_structured_output_wrapper(self): + """Test extraction from structured_output when result is empty.""" + runner = SimpleClaudeRunner() + + claude_output = { + "result": "", + "structured_output": { + "pr_summary": {"overview": "Test", "file_changes": []}, + "findings": [ + {"file": "test.py", "line": 10, "severity": "HIGH"} + ], + }, + } + + result = runner._extract_review_findings(claude_output) + assert len(result["findings"]) == 1 + assert result["pr_summary"]["overview"] == "Test" def test_extract_review_findings_direct_format(self): """Test that direct findings format was removed - only wrapped format is supported.""" @@ -320,10 +358,8 @@ def test_extract_review_findings_direct_format(self): } } - result = runner._extract_review_findings(claude_output) - # Should return empty structure since direct format is not supported - assert len(result['findings']) == 0 - assert result['pr_summary'] == {} + with pytest.raises(ValueError, match="valid review payload"): + runner._extract_review_findings(claude_output) def test_extract_review_findings_text_fallback(self): """Test that text fallback was removed - only JSON is supported.""" @@ -334,20 +370,16 @@ def test_extract_review_findings_text_fallback(self): "result": "Found SQL injection vulnerability in database.py line 45" } - # Should return empty findings since we don't parse text anymore - result = runner._extract_review_findings(claude_output) - assert len(result['findings']) == 0 - assert result['pr_summary'] == {} + with pytest.raises(ValueError, match="valid review payload"): + runner._extract_review_findings(claude_output) def test_extract_review_findings_empty(self): """Test extraction with no findings.""" runner = SimpleClaudeRunner() - # Various empty formats for output in [None, {}, {"result": ""}, {"other": "data"}]: - result = runner._extract_review_findings(output) - assert result['findings'] == [] - assert result['pr_summary'] == {} + with pytest.raises(ValueError): + runner._extract_review_findings(output) def test_create_findings_from_text(self): """Test that _create_findings_from_text was removed.""" diff --git a/claudecode/test_github_action_audit.py b/claudecode/test_github_action_audit.py index dc4a7c7..06157f4 100644 --- a/claudecode/test_github_action_audit.py +++ b/claudecode/test_github_action_audit.py @@ -3,6 +3,8 @@ Pytest tests for GitHub Action audit script components. """ +import pytest + class TestImports: """Test that all required modules can be imported.""" @@ -455,8 +457,35 @@ def test_extract_findings_with_pr_summary(self): assert result['pr_summary']['file_changes'][0]['files'] == ['src/auth.py'] assert len(result['findings']) == 1 + def test_extract_findings_from_structured_output(self): + """Test that structured_output is accepted when result is empty.""" + from claudecode.github_action_audit import SimpleClaudeRunner + + runner = SimpleClaudeRunner() + + claude_output = { + 'result': '', + 'structured_output': { + 'findings': [ + {'file': 'test.py', 'line': 1, 'severity': 'HIGH', 'description': 'Test finding'} + ], + 'pr_summary': { + 'overview': 'This PR adds authentication middleware.', + 'file_changes': [ + {'label': 'src/auth.py', 'files': ['src/auth.py'], 'changes': 'Added JWT validation'} + ] + } + } + } + + result = runner._extract_review_findings(claude_output) + + assert 'pr_summary' in result + assert result['pr_summary']['overview'] == 'This PR adds authentication middleware.' + assert len(result['findings']) == 1 + def test_extract_findings_without_pr_summary(self): - """Test that missing pr_summary defaults to empty dict.""" + """Test that missing pr_summary fails instead of degrading silently.""" import json from claudecode.github_action_audit import SimpleClaudeRunner @@ -468,10 +497,8 @@ def test_extract_findings_without_pr_summary(self): }) } - result = runner._extract_review_findings(claude_output) - - assert 'pr_summary' in result - assert result['pr_summary'] == {} + with pytest.raises(ValueError, match="valid review payload"): + runner._extract_review_findings(claude_output) def test_extract_findings_empty_result(self): """Test extraction with invalid/empty Claude output.""" @@ -479,11 +506,8 @@ def test_extract_findings_empty_result(self): runner = SimpleClaudeRunner() - result = runner._extract_review_findings({}) - - assert 'pr_summary' in result - assert result['pr_summary'] == {} - assert result['findings'] == [] + with pytest.raises(ValueError, match="valid review payload"): + runner._extract_review_findings({}) def test_extract_findings_with_grouped_files(self): """Test that grouped file_changes are handled correctly.""" diff --git a/claudecode/test_workflow_integration.py b/claudecode/test_workflow_integration.py index 35c19aa..c217262 100644 --- a/claudecode/test_workflow_integration.py +++ b/claudecode/test_workflow_integration.py @@ -13,6 +13,15 @@ from claudecode.github_action_audit import main +def claude_success_output(payload): + """Build a Claude CLI success wrapper with structured output.""" + return json.dumps({ + "type": "result", + "subtype": "success", + "structured_output": payload, + }) + + class TestFullWorkflowIntegration: """Test complete workflow scenarios.""" @@ -345,8 +354,28 @@ def test_workflow_with_llm_filtering(self, mock_get, mock_run): mock_run.side_effect = [ Mock(returncode=0, stdout='claude version 1.0.0', stderr=''), - Mock(returncode=0, stdout=json.dumps({"findings": claude_findings}), stderr=''), - Mock(returncode=0, stdout=json.dumps({"findings": []}), stderr='') + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "Dependency update review.", + "file_changes": [], + }, + "findings": claude_findings, + }), + stderr='', + ), + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "Dependency update review.", + "file_changes": [], + }, + "findings": [], + }), + stderr='', + ) ] with tempfile.TemporaryDirectory() as tmpdir: @@ -423,8 +452,30 @@ def test_workflow_with_no_review_issues(self, mock_get, mock_run): # Claude finds no issues mock_run.side_effect = [ Mock(returncode=0, stdout='claude version 1.0.0', stderr=''), - Mock(returncode=0, stdout='{"findings": [], "analysis_summary": {"review_completed": true}}', stderr=''), - Mock(returncode=0, stdout='{"findings": [], "analysis_summary": {"review_completed": true}}', stderr='') + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "README-only update.", + "file_changes": [], + }, + "findings": [], + "analysis_summary": {"review_completed": True}, + }), + stderr='', + ), + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "README-only update.", + "file_changes": [], + }, + "findings": [], + "analysis_summary": {"review_completed": True}, + }), + stderr='', + ) ] with tempfile.TemporaryDirectory() as tmpdir: @@ -517,8 +568,28 @@ def test_workflow_with_massive_pr(self, mock_get, mock_run): # Claude handles it gracefully mock_run.side_effect = [ Mock(returncode=0, stdout='claude version 1.0.0', stderr=''), - Mock(returncode=0, stdout='{"findings": []}', stderr=''), - Mock(returncode=0, stdout='{"findings": []}', stderr='') + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "Large refactor review.", + "file_changes": [], + }, + "findings": [], + }), + stderr='', + ), + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "Large refactor review.", + "file_changes": [], + }, + "findings": [], + }), + stderr='', + ) ] with tempfile.TemporaryDirectory() as tmpdir: @@ -597,8 +668,28 @@ def test_workflow_with_binary_files(self, mock_get, mock_run): mock_run.side_effect = [ Mock(returncode=0, stdout='claude version 1.0.0', stderr=''), - Mock(returncode=0, stdout='{"findings": []}', stderr=''), - Mock(returncode=0, stdout='{"findings": []}', stderr='') + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "Binary file review.", + "file_changes": [], + }, + "findings": [], + }), + stderr='', + ), + Mock( + returncode=0, + stdout=claude_success_output({ + "pr_summary": { + "overview": "Binary file review.", + "file_changes": [], + }, + "findings": [], + }), + stderr='', + ) ] with tempfile.TemporaryDirectory() as tmpdir: