JamesonRGrieve · JamesonRGrieve · Dec 21, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 19, 2025
diff --git a/.github/workflows/regression-test.yml b/.github/workflows/regression-test.yml
@@ -300,8 +300,8 @@ jobs:
 
           # States ordered from best to worst for matrix display
           # This ordering puts improvements below diagonal, regressions above
-          STATES = ["Pass", "Skip", "XFail", "Fail", "Error", "Nonexistent"]
-          STATE_KEYS = ["passing", "skipped", "xfailed", "failing", "error", "nonexistent"]
+          STATES = ["Pass", "XPass", "Skip", "XFail", "Fail", "Error", "Nonexistent"]
+          STATE_KEYS = ["passing", "xpassed", "skipped", "xfailed", "failing", "error", "nonexistent"]
 
           def load_json(path: str) -> dict:
               file_path = Path(path)
@@ -350,6 +350,8 @@ jobs:
                       data["skipped"].add(str(test_id))
                   elif status in {"xfailed", "xfail"}:
                       data["xfailed"].add(str(test_id))
+                  elif status in {"xpassed", "xpass"}:
+                      data["xpassed"].add(str(test_id))
                   else:
                       data["other"].add(str(test_id))
 
@@ -360,6 +362,7 @@ jobs:
                   "error": set(coerce_list(raw.get("error_tests"))),
                   "skipped": set(coerce_list(raw.get("skipped_tests"))),
                   "xfailed": set(coerce_list(raw.get("xfailed_tests"))),
+                  "xpassed": set(coerce_list(raw.get("xpassed_tests"))),
                   "warnings": set(coerce_list(raw.get("warnings"))),
                   "all": set(coerce_list(raw.get("all_tests"))),
                   "other": set(),
@@ -374,6 +377,7 @@ jobs:
                       | data["error"]
                       | data["skipped"]
                       | data["xfailed"]
+                      | data["xpassed"]
                       | data["other"]
                   )
 
@@ -393,19 +397,25 @@ jobs:
               # Anything to Nonexistent is regression
               if to_state == "Nonexistent":
                   return "regression"
-              # Pass to anything else is regression
-              if from_state == "Pass":
+              # Pass/XPass to anything else (except each other) is regression
+              if from_state == "Pass" and to_state != "XPass":
+                  return "regression"
+              if from_state == "XPass" and to_state != "Pass":
                   return "regression"
-              # Skip to Fail/Error already covered above
 
               # Neutrals
               if from_state == "Fail" and to_state == "XFail":
                   return "neutral"
               if from_state == "Skip" and to_state == "XFail":
                   return "neutral"
+              # Pass <-> XPass is neutral (just adding/removing xfail marker)
+              if from_state == "Pass" and to_state == "XPass":
+                  return "neutral"
+              if from_state == "XPass" and to_state == "Pass":
+                  return "neutral"
 
               # Improvements (italic): moving to a better state
-              if to_state == "Pass":
+              if to_state in ["Pass", "XPass"]:
                   return "improvement"
               if from_state in ["Fail", "Error"] and to_state == "Skip":
                   return "improvement"
@@ -432,6 +442,7 @@ jobs:
               """Get the set of tests in a given state."""
               state_map = {
                   "Pass": "passing",
+                  "XPass": "xpassed",
                   "Skip": "skipped",
                   "XFail": "xfailed",
                   "Fail": "failing",

diff --git a/.github/workflows/run-branch-test.yml b/.github/workflows/run-branch-test.yml
@@ -84,15 +84,19 @@ jobs:
       has_errors: ${{ steps.results.outputs.has_errors }}
       error_type: ${{ steps.results.outputs.error_type }}
       failing_count: ${{ steps.results.outputs.failing_count }}
+      error_count: ${{ steps.results.outputs.error_count }}
       skipped_count: ${{ steps.results.outputs.skipped_count }}
       xfailed_count: ${{ steps.results.outputs.xfailed_count }}
+      xpassed_count: ${{ steps.results.outputs.xpassed_count }}
 
     steps:
       # Define cache keys
       - name: Set cache keys
         id: cache-keys
         run: |
-          BASE_KEY="pytest-${{ inputs.target_branch }}-${{ github.event.pull_request.base.sha || github.sha }}"
+          # Version bump forces cache invalidation when extraction logic changes
+          CACHE_VERSION="v3"
+          BASE_KEY="pytest-${CACHE_VERSION}-${{ inputs.target_branch }}-${{ github.event.pull_request.base.sha || github.sha }}"
           echo "base_key=$BASE_KEY" >> $GITHUB_OUTPUT
           echo "pending_key=${BASE_KEY}-pending-${{ github.run_id }}" >> $GITHUB_OUTPUT
           echo "🔍 Cache base key: $BASE_KEY"
@@ -343,8 +347,10 @@ jobs:
           percentage = 0.0
           passing_tests = []
           failing_tests = []
+          error_tests = []
           skipped_tests = []
           xfailed_tests = []
+          xpassed_tests = []
           all_tests = []
           skipped_with_reasons = {}
           xfailed_with_reasons = {}
@@ -367,8 +373,10 @@ jobs:
                       all_tests.append(nodeid)
                       if outcome == 'passed':
                           passing_tests.append(nodeid)
-                      elif outcome in ['failed', 'error']:
+                      elif outcome == 'failed':
                           failing_tests.append(nodeid)
+                      elif outcome == 'error':
+                          error_tests.append(nodeid)
                       elif outcome == 'skipped':
                           skipped_tests.append(nodeid)
                           reason = test.get('longrepr', 'No reason')
@@ -381,6 +389,8 @@ jobs:
                           if isinstance(reason, list):
                               reason = reason[0] if reason else 'No reason'
                           xfailed_with_reasons[nodeid] = str(reason).strip()
+                      elif outcome == 'xpassed':
+                          xpassed_tests.append(nodeid)
 
               percentage = (passed / total * 100) if total > 0 else 0
           except FileNotFoundError:
@@ -415,8 +425,10 @@ jobs:
               json.dump({
                   'passing_tests': passing_tests,
                   'failing_tests': failing_tests,
+                  'error_tests': error_tests,
                   'skipped_tests': skipped_tests,
                   'xfailed_tests': xfailed_tests,
+                  'xpassed_tests': xpassed_tests,
                   'all_tests': all_tests,
                   'skipped_tests_with_reasons': skipped_with_reasons,
                   'xfailed_tests_with_reasons': xfailed_with_reasons,
@@ -430,8 +442,10 @@ jobs:
               f.write(f'passed={passed}\n')
               f.write(f'percentage={percentage:.2f}\n')
               f.write(f'failing_count={len(failing_tests)}\n')
+              f.write(f'error_count={len(error_tests)}\n')
               f.write(f'skipped_count={len(skipped_tests)}\n')
               f.write(f'xfailed_count={len(xfailed_tests)}\n')
+              f.write(f'xpassed_count={len(xpassed_tests)}\n')
           "
 
       - name: Save results to cache
@@ -460,8 +474,10 @@ jobs:
           has_errors=${{ steps.check-collection.outputs.has_errors || 'false' }}
           error_type=${{ steps.check-collection.outputs.error_type || 'none' }}
           failing_count=${{ steps.extract-results.outputs.failing_count || '0' }}
+          error_count=${{ steps.extract-results.outputs.error_count || '0' }}
           skipped_count=${{ steps.extract-results.outputs.skipped_count || '0' }}
           xfailed_count=${{ steps.extract-results.outputs.xfailed_count || '0' }}
+          xpassed_count=${{ steps.extract-results.outputs.xpassed_count || '0' }}
           EOF
           # Remove leading whitespace from the env file
           sed -i 's/^[[:space:]]*//' cached_target/outputs.env
@@ -501,8 +517,10 @@ jobs:
             echo "has_errors=${{ steps.load-cache.outputs.has_errors || 'false' }}" >> $GITHUB_OUTPUT
             echo "error_type=${{ steps.load-cache.outputs.error_type || 'none' }}" >> $GITHUB_OUTPUT
             echo "failing_count=${{ steps.load-cache.outputs.failing_count || '0' }}" >> $GITHUB_OUTPUT
+            echo "error_count=${{ steps.load-cache.outputs.error_count || '0' }}" >> $GITHUB_OUTPUT
             echo "skipped_count=${{ steps.load-cache.outputs.skipped_count || '0' }}" >> $GITHUB_OUTPUT
             echo "xfailed_count=${{ steps.load-cache.outputs.xfailed_count || '0' }}" >> $GITHUB_OUTPUT
+            echo "xpassed_count=${{ steps.load-cache.outputs.xpassed_count || '0' }}" >> $GITHUB_OUTPUT
           else
             echo "📋 Using fresh results"
             echo "total=${{ steps.extract-results.outputs.total || '0' }}" >> $GITHUB_OUTPUT
@@ -513,8 +531,10 @@ jobs:
             echo "has_errors=${{ steps.check-collection.outputs.has_errors || 'false' }}" >> $GITHUB_OUTPUT
             echo "error_type=${{ steps.check-collection.outputs.error_type || 'none' }}" >> $GITHUB_OUTPUT
             echo "failing_count=${{ steps.extract-results.outputs.failing_count || '0' }}" >> $GITHUB_OUTPUT
+            echo "error_count=${{ steps.extract-results.outputs.error_count || '0' }}" >> $GITHUB_OUTPUT
             echo "skipped_count=${{ steps.extract-results.outputs.skipped_count || '0' }}" >> $GITHUB_OUTPUT
             echo "xfailed_count=${{ steps.extract-results.outputs.xfailed_count || '0' }}" >> $GITHUB_OUTPUT
+            echo "xpassed_count=${{ steps.extract-results.outputs.xpassed_count || '0' }}" >> $GITHUB_OUTPUT
           fi
 
   # Compare results

diff --git a/.github/workflows/test-py-pytest.yml b/.github/workflows/test-py-pytest.yml
@@ -192,6 +192,7 @@ jobs:
           error_tests = []
           skipped_tests = []
           xfailed_tests = []
+          xpassed_tests = []
           all_tests = []
           skipped_with_reasons = {}
           xfailed_with_reasons = {}
@@ -230,6 +231,8 @@ jobs:
                           if isinstance(reason, list):
                               reason = reason[0] if reason else 'No reason'
                           xfailed_with_reasons[nodeid] = str(reason).strip()
+                      elif outcome == 'xpassed':
+                          xpassed_tests.append(nodeid)
 
               percentage = (passed / total * 100) if total > 0 else 0
           except FileNotFoundError:
@@ -267,6 +270,7 @@ jobs:
                   'error_tests': error_tests,
                   'skipped_tests': skipped_tests,
                   'xfailed_tests': xfailed_tests,
+                  'xpassed_tests': xpassed_tests,
                   'all_tests': all_tests,
                   'skipped_tests_with_reasons': skipped_with_reasons,
                   'xfailed_tests_with_reasons': xfailed_with_reasons,
@@ -283,6 +287,7 @@ jobs:
               f.write(f'error_count={len(error_tests)}\n')
               f.write(f'skipped_count={len(skipped_tests)}\n')
               f.write(f'xfailed_count={len(xfailed_tests)}\n')
+              f.write(f'xpassed_count={len(xpassed_tests)}\n')
           "
 
       - name: Upload test artifacts