diff --git a/.github/workflows/regression-test.yml b/.github/workflows/regression-test.yml new file mode 100644 index 0000000..c9213e0 --- /dev/null +++ b/.github/workflows/regression-test.yml @@ -0,0 +1,473 @@ +name: Reusable Regression Test + +on: + workflow_call: + inputs: + runs_on: + description: "Runner label for the regression analysis job." + required: false + type: string + default: "ubuntu-latest" + baseline_label: + description: "Display name for the baseline (target) test results." + required: true + type: string + baseline_results_json: + description: "Standardized JSON string describing the baseline test results." + required: true + type: string + current_label: + description: "Display name for the current (PR) test results." + required: true + type: string + current_results_json: + description: "Standardized JSON string describing the current test results." + required: true + type: string + baseline_passed: + description: "Number of passing tests in the baseline run." + required: true + type: string + baseline_total: + description: "Total number of tests in the baseline run." + required: true + type: string + baseline_percentage: + description: "Pass percentage for the baseline run." + required: true + type: string + current_passed: + description: "Number of passing tests in the current run." + required: true + type: string + current_total: + description: "Total number of tests in the current run." + required: true + type: string + current_percentage: + description: "Pass percentage for the current run." + required: true + type: string + baseline_collection_errors: + description: "Whether the baseline run encountered discovery errors." + required: false + type: string + default: "false" + baseline_no_tests_found: + description: "Whether the baseline run found zero tests." + required: false + type: string + default: "false" + current_collection_errors: + description: "Whether the current run encountered discovery errors." + required: false + type: string + default: "false" + current_no_tests_found: + description: "Whether the current run found zero tests." + required: false + type: string + default: "false" + artifact_name: + description: "Name for the artifact containing regression details." + required: false + type: string + default: "regression_details" + outputs: + has_regressions: + description: "Boolean indicating if regressions were found." + value: ${{ jobs.regression-analysis.outputs.has_regressions }} + regression_count: + description: "Number of regressions detected." + value: ${{ jobs.regression-analysis.outputs.regression_count }} + pass_to_fail_count: + description: "Number of tests that regressed from pass to fail." + value: ${{ jobs.regression-analysis.outputs.pass_to_fail_count }} + pass_to_skip_count: + description: "Number of tests that regressed from pass to skip/xfail." + value: ${{ jobs.regression-analysis.outputs.pass_to_skip_count }} + pass_to_gone_count: + description: "Number of previously passing tests that disappeared." + value: ${{ jobs.regression-analysis.outputs.pass_to_gone_count }} + fail_to_gone_count: + description: "Number of previously failing tests that disappeared." + value: ${{ jobs.regression-analysis.outputs.fail_to_gone_count }} + discovery_regression_count: + description: "Number of new discovery warnings." + value: ${{ jobs.regression-analysis.outputs.discovery_regression_count }} + fail_to_skip_count: + description: "Number of tests improved from fail to skip." + value: ${{ jobs.regression-analysis.outputs.fail_to_skip_count }} + fail_to_pass_count: + description: "Number of tests improved from fail to pass." + value: ${{ jobs.regression-analysis.outputs.fail_to_pass_count }} + new_tests_count: + description: "Number of new tests introduced in the current run." + value: ${{ jobs.regression-analysis.outputs.new_tests_count }} + +jobs: + regression-analysis: + runs-on: ${{ inputs.runs_on }} + outputs: + has_regressions: ${{ steps.analyze.outputs.has_regressions }} + regression_count: ${{ steps.analyze.outputs.regression_count }} + pass_to_fail_count: ${{ steps.analyze.outputs.pass_to_fail_count }} + pass_to_skip_count: ${{ steps.analyze.outputs.pass_to_skip_count }} + pass_to_gone_count: ${{ steps.analyze.outputs.pass_to_gone_count }} + fail_to_gone_count: ${{ steps.analyze.outputs.fail_to_gone_count }} + discovery_regression_count: ${{ steps.analyze.outputs.discovery_regression_count }} + fail_to_skip_count: ${{ steps.analyze.outputs.fail_to_skip_count }} + fail_to_pass_count: ${{ steps.analyze.outputs.fail_to_pass_count }} + new_tests_count: ${{ steps.analyze.outputs.new_tests_count }} + steps: + - name: Validate discovery status + run: | + echo "Baseline discovery errors: ${{ inputs.baseline_collection_errors }}" + echo "Baseline no tests found: ${{ inputs.baseline_no_tests_found }}" + echo "Current discovery errors: ${{ inputs.current_collection_errors }}" + echo "Current no tests found: ${{ inputs.current_no_tests_found }}" + + if [[ "${{ inputs.current_collection_errors }}" == "true" ]]; then + echo "::error::Discovery errors detected in current test results." + exit 1 + fi + + if [[ "${{ inputs.current_no_tests_found }}" == "true" ]]; then + echo "::error::No tests were discovered in the current run." + exit 1 + fi + + if [[ "${{ inputs.baseline_collection_errors }}" == "true" ]]; then + echo "::warning::Baseline results include discovery errors. Comparisons may be incomplete." + fi + + if [[ "${{ inputs.baseline_no_tests_found }}" == "true" ]]; then + echo "::warning::No tests were found in the baseline run." + fi + + - name: Prepare regression input files + run: | + cat <<'JSON' > baseline_results.json +${{ inputs.baseline_results_json }} +JSON + cat <<'JSON' > current_results.json +${{ inputs.current_results_json }} +JSON + + - name: Analyze regression data + id: analyze + run: | + python3 - <<'PY' + import json + import os + from pathlib import Path + + def load_json(path: str) -> dict: + file_path = Path(path) + if not file_path.exists(): + print(f"::warning::Input file {path} not found. Using empty defaults.") + return {} + + content = file_path.read_text(encoding='utf-8').strip() + if not content: + print(f"::warning::Input file {path} is empty. Using empty defaults.") + return {} + + try: + return json.loads(content) + except json.JSONDecodeError as exc: + print(f"::warning::Failed to parse JSON from {path}: {exc}") + return {} + + def coerce_list(value): + if isinstance(value, list): + return [str(item) for item in value] + return [] + + def extract_from_tests_array(results: dict, data: dict) -> None: + tests = results.get("tests") + if not isinstance(tests, list): + return + + for entry in tests: + if not isinstance(entry, dict): + continue + test_id = entry.get("id") or entry.get("name") or entry.get("nodeid") + if not test_id: + continue + status = entry.get("status") or entry.get("outcome") + if not status: + continue + status = status.lower() + if status in {"passed", "pass"}: + data["passing"].add(str(test_id)) + elif status in {"failed", "fail", "error"}: + data["failing"].add(str(test_id)) + elif status in {"skipped", "skip"}: + data["skipped"].add(str(test_id)) + elif status in {"xfailed", "xfail"}: + data["xfailed"].add(str(test_id)) + else: + data["other"].add(str(test_id)) + + def build_status_sets(raw: dict) -> dict: + data = { + "passing": set(coerce_list(raw.get("passing_tests"))), + "failing": set(coerce_list(raw.get("failing_tests"))), + "skipped": set(coerce_list(raw.get("skipped_tests"))), + "xfailed": set(coerce_list(raw.get("xfailed_tests"))), + "warnings": set(coerce_list(raw.get("warnings"))), + "all": set(coerce_list(raw.get("all_tests"))), + "other": set(), + } + + extract_from_tests_array(raw, data) + + if not data["all"]: + data["all"].update( + data["passing"] + | data["failing"] + | data["skipped"] + | data["xfailed"] + | data["other"] + ) + + return data + + baseline_data = build_status_sets(load_json("baseline_results.json")) + current_data = build_status_sets(load_json("current_results.json")) + + pass_to_fail = sorted(baseline_data["passing"] & current_data["failing"]) + pass_to_skip = sorted( + baseline_data["passing"] & (current_data["skipped"] | current_data["xfailed"]) + ) + pass_to_gone = sorted(baseline_data["passing"] - current_data["all"]) + fail_to_gone = sorted(baseline_data["failing"] - current_data["all"]) + discovery_regressions = sorted(current_data["warnings"] - baseline_data["warnings"]) + + fail_to_skip = sorted(baseline_data["failing"] & current_data["skipped"]) + fail_to_pass = sorted(baseline_data["failing"] & current_data["passing"]) + new_tests = sorted(current_data["all"] - baseline_data["all"]) + + regression_count = ( + len(pass_to_fail) + + len(pass_to_skip) + + len(pass_to_gone) + + len(fail_to_gone) + + len(discovery_regressions) + ) + has_regressions = regression_count > 0 + + analysis_payload = { + "pass_to_fail": pass_to_fail, + "pass_to_skip": pass_to_skip, + "pass_to_gone": pass_to_gone, + "fail_to_gone": fail_to_gone, + "discovery_regressions": discovery_regressions, + "fail_to_skip": fail_to_skip, + "fail_to_pass": fail_to_pass, + "new_tests": new_tests, + "counts": { + "pass_to_fail": len(pass_to_fail), + "pass_to_skip": len(pass_to_skip), + "pass_to_gone": len(pass_to_gone), + "fail_to_gone": len(fail_to_gone), + "discovery": len(discovery_regressions), + "fail_to_skip": len(fail_to_skip), + "fail_to_pass": len(fail_to_pass), + "new_tests": len(new_tests), + }, + } + + Path("regression_analysis.json").write_text( + json.dumps(analysis_payload, indent=2), + encoding="utf-8", + ) + + def write_section(handle, title: str, entries: list[str], intro: str) -> None: + if not entries: + return + handle.write(f"{title} ({len(entries)} tests)\n") + handle.write(f"{intro}\n") + for idx, test_name in enumerate(entries, 1): + handle.write(f" {idx}. {test_name}\n") + handle.write("\n") + + with Path("comprehensive_regression_report.txt").open("w", encoding="utf-8") as report: + report.write("COMPREHENSIVE REGRESSION ANALYSIS\n") + report.write("=" * 50 + "\n\n") + + write_section( + report, + "PASS-TO-FAIL REGRESSIONS", + pass_to_fail, + "Previously passing, now failing:", + ) + write_section( + report, + "PASS-TO-SKIP REGRESSIONS", + pass_to_skip, + "Previously passing, now skipped or xfailed:", + ) + write_section( + report, + "FAIL-TO-SKIP IMPROVEMENTS", + fail_to_skip, + "Previously failing, now skipped (treated as improvements):", + ) + write_section( + report, + "FAIL-TO-PASS IMPROVEMENTS", + fail_to_pass, + "Previously failing, now passing (treated as improvements):", + ) + write_section( + report, + "PASS-TO-GONE REGRESSIONS", + pass_to_gone, + "Previously passing, now completely missing:", + ) + write_section( + report, + "FAIL-TO-GONE REGRESSIONS", + fail_to_gone, + "Previously failing, now completely missing:", + ) + + if discovery_regressions: + report.write( + f"DISCOVERY REGRESSIONS ({len(discovery_regressions)} warnings)\n" + ) + report.write("New warnings not present in baseline:\n") + for idx, warning in enumerate(discovery_regressions, 1): + truncated = (warning[:200] + "...") if len(warning) > 200 else warning + report.write(f" {idx}. {truncated}\n") + report.write("\n") + + write_section( + report, + "NEW TESTS", + new_tests, + "Tests present only in the current run:", + ) + + if not has_regressions and not (fail_to_skip or fail_to_pass or new_tests): + report.write("No regressions or test suite changes detected.\n") + + if pass_to_fail: + with Path("regression_details.txt").open("w", encoding="utf-8") as handle: + handle.write( + f"Found {len(pass_to_fail)} tests that regressed from pass to fail:\n\n" + ) + for idx, test_name in enumerate(pass_to_fail, 1): + handle.write(f"{idx}. {test_name}\n") + else: + Path("regression_details.txt").write_text( + "No pass-to-fail regressions detected.\n", + encoding="utf-8", + ) + + table_rows = [ + ("Pass → Fail", len(pass_to_fail)), + ("Pass → Skip/XFail", len(pass_to_skip)), + ("Pass → Gone", len(pass_to_gone)), + ("Fail → Gone", len(fail_to_gone)), + ("Discovery Warnings", len(discovery_regressions)), + ("Fail → Skip (Improvement)", len(fail_to_skip)), + ("Fail → Pass (Improvement)", len(fail_to_pass)), + ("New Tests", len(new_tests)), + ] + + summary_lines = ["| Category | Count |", "| --- | --- |"] + summary_lines.extend([f"| {label} | {count} |" for label, count in table_rows]) + + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if summary_path: + with Path(summary_path).open("a", encoding="utf-8") as summary_file: + summary_file.write("### Regression Breakdown\n") + summary_file.write("\n".join(summary_lines) + "\n\n") + if fail_to_skip or fail_to_pass: + summary_file.write("_Improvements are highlighted for visibility and do not fail the job._\n\n") + if new_tests: + summary_file.write( + "New tests are listed to highlight additions alongside regressions.\n\n" + ) + + print("📊 Regression Analysis Results:") + for label, count in table_rows: + print(f" {label}: {count}") + if has_regressions: + print(f"❌ Total regressions detected: {regression_count}") + else: + print("✅ No regressions detected in monitored categories.") + + def sanitize(value: str) -> str: + return value.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D") + + outputs = { + "has_regressions": "true" if has_regressions else "false", + "regression_count": str(regression_count), + "pass_to_fail_count": str(len(pass_to_fail)), + "pass_to_skip_count": str(len(pass_to_skip)), + "pass_to_gone_count": str(len(pass_to_gone)), + "fail_to_gone_count": str(len(fail_to_gone)), + "discovery_regression_count": str(len(discovery_regressions)), + "fail_to_skip_count": str(len(fail_to_skip)), + "fail_to_pass_count": str(len(fail_to_pass)), + "new_tests_count": str(len(new_tests)), + } + + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with Path(github_output).open("a", encoding="utf-8") as handle: + for key, value in outputs.items(): + handle.write(f"{key}={sanitize(value)}\n") + else: + print("::warning::GITHUB_OUTPUT environment variable is not set.") + PY + + - name: Upload regression artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: | + regression_details.txt + comprehensive_regression_report.txt + regression_analysis.json + retention-days: 3 + if-no-files-found: ignore + + - name: Compare aggregate results when no regressions + if: ${{ steps.analyze.outputs.has_regressions != 'true' }} + run: | + echo "${{ inputs.baseline_label }}: ${{ inputs.baseline_passed }}/${{ inputs.baseline_total }} passed (${{ inputs.baseline_percentage }}%)" + echo "${{ inputs.current_label }}: ${{ inputs.current_passed }}/${{ inputs.current_total }} passed (${{ inputs.current_percentage }}%)" + + if [ "${{ inputs.current_total }}" = "0" ]; then + echo "::error::The current run reported zero collected tests." + exit 1 + fi + + if ! command -v bc >/dev/null 2>&1; then + echo "::warning::'bc' is not available; skipping numeric comparison." + exit 0 + fi + + if (( $(echo "${{ inputs.current_passed }} < ${{ inputs.baseline_passed }}" | bc -l) )); then + echo "::error::Fewer passing tests than baseline." + exit 1 + fi + + if (( $(echo "${{ inputs.current_percentage }} < ${{ inputs.baseline_percentage }}" | bc -l) )); then + echo "::error::Pass percentage decreased compared to baseline." + exit 1 + fi + + echo "✅ Aggregate test results are at least as good as baseline." + + - name: Fail when regressions detected + if: ${{ steps.analyze.outputs.has_regressions == 'true' }} + run: | + echo "::error::Test regressions detected. See regression artifacts for details." + exit 1 diff --git a/.github/workflows/test-py-pytest.yml b/.github/workflows/test-py-pytest.yml index 787832f..1f9d50c 100644 --- a/.github/workflows/test-py-pytest.yml +++ b/.github/workflows/test-py-pytest.yml @@ -75,6 +75,7 @@ jobs: failing_count: ${{ steps.extract-results.outputs.failing_count }} skipped_count: ${{ steps.extract-results.outputs.skipped_count }} xfailed_count: ${{ steps.extract-results.outputs.xfailed_count }} + test_data_json: ${{ steps.extract-results.outputs.test_data_json }} steps: - name: Checkout PR Branch @@ -313,6 +314,9 @@ jobs: f.write(f'failing_count={len(failing_tests)}\\n') f.write(f'skipped_count={len(skipped_tests)}\\n') f.write(f'xfailed_count={len(xfailed_tests)}\\n') + f.write('test_data_json<> $GITHUB_OUTPUT - echo "REGRESSION_COUNT=$_regression_count" >> $GITHUB_OUTPUT - - if [[ "$_has_regressions" == "false" ]]; then - if [ -f regression_details.txt ] && [ "$_has_regressions" == "false" ]; then - echo "::notice::Regression details file (regression_details.txt) was found but no valid regression entries were counted by this step, or the file was empty." - else - echo "No test regressions detected by this step." - fi - fi - - - name: Upload regression details artifact - if: always() - uses: actions/upload-artifact@v4 - with: - name: regression_details_pr_${{ github.event.pull_request.number || github.run_id }}_tests - path: | - regression_details.txt - comprehensive_regression_report.txt - retention-days: 1 - if-no-files-found: ignore - - - name: Check for test additions and removals from artifacts - run: | - # Create analysis debug file - exec 3>&1 4>&2 - exec 1> >(tee -a debug_test_changes_analysis.log) 2>&1 - - echo "Checking for test additions and removals between target and PR branches" - - python3 - << 'EOF' - import json - import os - import glob - - try: - # Load test data from artifacts - target_data = {} - pr_data = {} - - # Load target and PR data - target_files = glob.glob('./artifacts/target_branch_data_*/target_test_data.json') - if target_files: - with open(target_files[0], 'r') as f: - target_data = json.load(f) - - pr_files = glob.glob('./artifacts/pr_branch_data_*/pr_test_data.json') - if pr_files: - with open(pr_files[0], 'r') as f: - pr_data = json.load(f) - - # Extract test arrays - target_all = target_data.get('all_tests', []) - pr_all = pr_data.get('all_tests', []) - - print(f"Parsed {len(target_all)} total tests from target branch") - print(f"Parsed {len(pr_all)} total tests from PR branch") - - # Find test additions and removals using set operations - target_all_set = set(target_all) - pr_all_set = set(pr_all) - - removed_tests = list(target_all_set - pr_all_set) # In target but not in PR - added_tests = list(pr_all_set - target_all_set) # In PR but not in target - - # Report test changes - if removed_tests and added_tests: - print(f"📊 Test Changes: +{len(added_tests)} added, -{len(removed_tests)} removed") - elif added_tests: - print(f"✅ Test Additions: {len(added_tests)} new test(s) added") - elif removed_tests: - print(f"⚠️ Test Removals: {len(removed_tests)} test(s) removed") - else: - print("✅ Test suite unchanged") - - except Exception as e: - print(f"Error in test addition/removal analysis: {e}") - import traceback - print(traceback.format_exc()) - EOF - - # Restore stdout/stderr for GitHub Actions - exec 1>&3 2>&4 - - echo "Test addition/removal analysis completed" - - - name: Compare test results - run: | - echo "Target: ${{ needs.test-target-branch.outputs.passed }}/${{ needs.test-target-branch.outputs.total }} passed (${{ needs.test-target-branch.outputs.percentage }}%)" - echo "PR: ${{ needs.test-source-branch.outputs.passed }}/${{ needs.test-source-branch.outputs.total }} passed (${{ needs.test-source-branch.outputs.percentage }}%)" - - if [[ "${{ needs.test-source-branch.outputs.total }}" == "0" ]]; then - echo "::error::No tests were found in the PR branch" - echo "❌ PR branch has no tests detected. Please add test files that match pytest's discovery pattern." - exit 1 - fi - - PR_PASSED=${{ needs.test-source-branch.outputs.passed }} - TARGET_PASSED=${{ needs.test-target-branch.outputs.passed }} - PR_PERCENTAGE=${{ needs.test-source-branch.outputs.percentage }} - TARGET_PERCENTAGE=${{ needs.test-target-branch.outputs.percentage }} - PR_TOTAL=${{ needs.test-source-branch.outputs.total }} - TARGET_TOTAL=${{ needs.test-target-branch.outputs.total }} - - # Handle case where target has no tests - if [[ "$TARGET_TOTAL" == "0" ]]; then - if [[ "$PR_PASSED" -gt 0 ]]; then - echo "✅ PR branch has tests and some are passing (target branch has no tests)" - exit 0 - else - echo "❌ PR branch has no passing tests" - echo " - Pass percentage: $PR_PERCENTAGE%" - exit 1 - fi - fi - - # Check for regressions from meta-regression-analysis OR our comprehensive analysis - COMPREHENSIVE_REGRESSIONS="false" - if [ -f comprehensive_regression_report.txt ]; then - # Check if there are any actual regressions in our comprehensive report - if grep -q "REGRESSIONS.*([1-9]" comprehensive_regression_report.txt; then - COMPREHENSIVE_REGRESSIONS="true" - fi - fi - - if [[ "${{ needs.perform-regression-analysis.outputs.has_regressions }}" == "true" ]] || [[ "$COMPREHENSIVE_REGRESSIONS" == "true" ]]; then - echo "❌ Test regressions detected from target branch" - REGRESSION_COUNT_VAL=${{ needs.perform-regression-analysis.outputs.regression_count }} - - echo "### :x: Test Regressions Detected!" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - # Extract counts from comprehensive report if available - if [ -f comprehensive_regression_report.txt ]; then - PASS_FAIL_COUNT=$(grep -o "PASS-TO-FAIL REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") - PASS_SKIP_COUNT=$(grep -o "PASS-TO-SKIP REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") - FAIL_SKIP_IMPROVEMENTS_COUNT=$(grep -o "FAIL-TO-SKIP IMPROVEMENTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") - PASS_GONE_COUNT=$(grep -o "PASS-TO-GONE REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") - FAIL_GONE_COUNT=$(grep -o "FAIL-TO-GONE REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") - DISCOVERY_COUNT=$(grep -o "DISCOVERY REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") - - TOTAL_REGRESSIONS=$((PASS_FAIL_COUNT + PASS_SKIP_COUNT + PASS_GONE_COUNT + FAIL_GONE_COUNT + DISCOVERY_COUNT)) - - echo "**$TOTAL_REGRESSIONS total regression(s) detected across multiple categories:**" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Category | Count |" >> $GITHUB_STEP_SUMMARY - echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| Pass → Fail | $PASS_FAIL_COUNT |" >> $GITHUB_STEP_SUMMARY - echo "| Pass → Skip/XFail | $PASS_SKIP_COUNT |" >> $GITHUB_STEP_SUMMARY - echo "| Pass → Gone | $PASS_GONE_COUNT |" >> $GITHUB_STEP_SUMMARY - echo "| Fail → Gone | $FAIL_GONE_COUNT |" >> $GITHUB_STEP_SUMMARY - echo "| Discovery Warnings | $DISCOVERY_COUNT |" >> $GITHUB_STEP_SUMMARY - if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -gt 0 ]]; then - echo "| Fail → Skip (Improvement) | $FAIL_SKIP_IMPROVEMENTS_COUNT |" >> $GITHUB_STEP_SUMMARY - fi - echo "" >> $GITHUB_STEP_SUMMARY - if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -gt 0 ]]; then - echo "_Note: Fail → Skip transitions are treated as improvements and do not cause this job to fail._" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - fi - else - echo "**$REGRESSION_COUNT_VAL test regression(s) detected.** See detailed breakdown below:" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - fi - - # Display comprehensive regression report if available - if [ -f comprehensive_regression_report.txt ]; then - echo "📋 **Comprehensive Regression Analysis:**" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - # Parse and format the comprehensive report for better GitHub display - while IFS= read -r line; do - if [[ "$line" =~ ^[A-Z-].*(REGRESSIONS|IMPROVEMENTS).*(\([0-9]+) ]]; then - echo "### $line" >> $GITHUB_STEP_SUMMARY - elif [[ "$line" =~ ^Previously ]]; then - echo "*$line*" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - elif [[ "$line" =~ ^[[:space:]]*[0-9]+\. ]]; then - echo "- ${line#*. }" >> $GITHUB_STEP_SUMMARY - elif [[ ! "$line" =~ ^=.*=$ ]] && [[ -n "$line" ]]; then - echo "$line" >> $GITHUB_STEP_SUMMARY - fi - done < comprehensive_regression_report.txt - - echo "" >> $GITHUB_STEP_SUMMARY - elif [ -f regression_details.txt ]; then - echo "### Pass-to-Fail Regressions" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - grep "^[0-9]\+\." regression_details.txt | while read -r line; do - echo "- ${line#*. }" >> $GITHUB_STEP_SUMMARY - done - echo "" >> $GITHUB_STEP_SUMMARY - else - echo "::warning::Regression details files not found." - fi - - echo "" >> $GITHUB_STEP_SUMMARY - echo "This job (\`compare-results\`) has been marked as failed due to these regressions." >> $GITHUB_STEP_SUMMARY - exit 1 - fi - - # Highlight improvements in the summary even when no regressions were found - if [ -f comprehensive_regression_report.txt ]; then - FAIL_SKIP_IMPROVEMENTS_COUNT=$(grep -o "FAIL-TO-SKIP IMPROVEMENTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") - - if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -gt 0 ]]; then - echo "### ✅ Test Improvements Detected" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Category | Count |" >> $GITHUB_STEP_SUMMARY - echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| Fail → Skip (Improvement) | $FAIL_SKIP_IMPROVEMENTS_COUNT |" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "_Note: Fail → Skip transitions are treated as improvements and do not cause this job to fail._" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - fi - fi - - # Continue with the original comparison if no regressions - if (( $(echo "$PR_PASSED >= $TARGET_PASSED" | bc -l) )) && (( $(echo "$PR_PERCENTAGE >= $TARGET_PERCENTAGE" | bc -l) )); then - echo "✅ PR branch has equal or better test results than target branch" - - # Additional verbose information about improvement - if (( $(echo "$PR_PASSED > $TARGET_PASSED" | bc -l) )); then - IMPROVEMENT=$(( $PR_PASSED - $TARGET_PASSED )) - echo " - Improvement: $IMPROVEMENT more passing tests than target branch" - fi - - if (( $(echo "$PR_PERCENTAGE > $TARGET_PERCENTAGE" | bc -l) )); then - PERCENTAGE_IMPROVEMENT=$(echo "$PR_PERCENTAGE - $TARGET_PERCENTAGE" | bc -l) - echo " - Percentage improvement: +${PERCENTAGE_IMPROVEMENT}% compared to target branch" - fi - - exit 0 - else - echo "❌ PR branch has worse test results than target branch" - echo " - Passed tests: $PR_PASSED vs $TARGET_PASSED on target branch" - echo " - Pass percentage: $PR_PERCENTAGE% vs $TARGET_PERCENTAGE% on target branch" - - # Add to job summary for general comparison failure - echo "### :x: Test Comparison Failed" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "The PR branch has worse test results than the target branch:" >> $GITHUB_STEP_SUMMARY - echo "- Passed tests: $PR_PASSED (PR) vs $TARGET_PASSED (Target)" >> $GITHUB_STEP_SUMMARY - echo "- Pass percentage: $PR_PERCENTAGE% (PR) vs $TARGET_PERCENTAGE% (Target)" >> $GITHUB_STEP_SUMMARY - - # Calculate regression metrics - if (( $(echo "$PR_PASSED < $TARGET_PASSED" | bc -l) )); then - REGRESSION=$(( $TARGET_PASSED - $PR_PASSED )) - echo " - Regression: $REGRESSION fewer passing tests than target branch" - fi - - if (( $(echo "$PR_PERCENTAGE < $TARGET_PERCENTAGE" | bc -l) )); then - PERCENTAGE_REGRESSION=$(echo "$TARGET_PERCENTAGE - $PR_PERCENTAGE" | bc -l) - echo " - Percentage regression: -${PERCENTAGE_REGRESSION}% compared to target branch" - fi - - exit 1 - fi + uses: ./.github/workflows/regression-test.yml + with: + runs_on: ${{ inputs.runs_on }} + baseline_label: ${{ inputs.target_branch_to_compare }} + baseline_results_json: ${{ needs.test-target-branch.outputs.test_data_json }} + current_label: ${{ github.head_ref || github.ref_name || 'source branch' }} + current_results_json: ${{ needs.test-source-branch.outputs.test_data_json }} + baseline_passed: ${{ needs.test-target-branch.outputs.passed }} + baseline_total: ${{ needs.test-target-branch.outputs.total }} + baseline_percentage: ${{ needs.test-target-branch.outputs.percentage }} + current_passed: ${{ needs.test-source-branch.outputs.passed }} + current_total: ${{ needs.test-source-branch.outputs.total }} + current_percentage: ${{ needs.test-source-branch.outputs.percentage }} + baseline_collection_errors: ${{ needs.test-target-branch.outputs.collection_errors }} + baseline_no_tests_found: ${{ needs.test-target-branch.outputs.no_tests_found }} + current_collection_errors: ${{ needs.test-source-branch.outputs.collection_errors }} + current_no_tests_found: ${{ needs.test-source-branch.outputs.no_tests_found }} + artifact_name: regression_details_pr_${{ github.event.pull_request.number || github.run_id }}_tests perform-regression-analysis: needs: [test-source-branch, test-target-branch] @@ -1454,9 +1004,11 @@ jobs: PASS_FAIL_COUNT=$(grep -o "PASS-TO-FAIL REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") PASS_SKIP_COUNT=$(grep -o "PASS-TO-SKIP REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") FAIL_SKIP_IMPROVEMENTS_COUNT=$(grep -o "FAIL-TO-SKIP IMPROVEMENTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") + FAIL_PASS_IMPROVEMENTS_COUNT=$(grep -o "FAIL-TO-PASS IMPROVEMENTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") PASS_GONE_COUNT=$(grep -o "PASS-TO-GONE REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") FAIL_GONE_COUNT=$(grep -o "FAIL-TO-GONE REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") DISCOVERY_COUNT=$(grep -o "DISCOVERY REGRESSIONS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") + NEW_TESTS_COUNT=$(grep -o "NEW TESTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") # Add category summaries (≤5 show paths, >5 show count + refer to file) if [[ "$PASS_FAIL_COUNT" -gt 0 ]]; then @@ -1483,17 +1035,29 @@ jobs: fi fi - if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -gt 0 ]]; then - if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -le 5 ]]; then - MESSAGE_LINES+=(":white_check_mark: **Fail→Skip Improvements ($FAIL_SKIP_IMPROVEMENTS_COUNT):**") - readarray -t test_paths < <(grep -A 100 "FAIL-TO-SKIP IMPROVEMENTS" comprehensive_regression_report.txt | grep "^ [0-9]\+\." | head -$FAIL_SKIP_IMPROVEMENTS_COUNT | sed 's/^ [0-9]\+\. //') - for test_path in "${test_paths[@]}"; do - MESSAGE_LINES+=("• \`$test_path\`") - done - else - MESSAGE_LINES+=(":white_check_mark: **Fail→Skip Improvements:** $FAIL_SKIP_IMPROVEMENTS_COUNT tests (see attached file)") - fi - fi + if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -gt 0 ]]; then + if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -le 5 ]]; then + MESSAGE_LINES+=(":white_check_mark: **Fail→Skip Improvements ($FAIL_SKIP_IMPROVEMENTS_COUNT):**") + readarray -t test_paths < <(grep -A 100 "FAIL-TO-SKIP IMPROVEMENTS" comprehensive_regression_report.txt | grep "^ [0-9]\+\." | head -$FAIL_SKIP_IMPROVEMENTS_COUNT | sed 's/^ [0-9]\+\. //') + for test_path in "${test_paths[@]}"; do + MESSAGE_LINES+=("• \`$test_path\`") + done + else + MESSAGE_LINES+=(":white_check_mark: **Fail→Skip Improvements:** $FAIL_SKIP_IMPROVEMENTS_COUNT tests (see attached file)") + fi + fi + + if [[ "$FAIL_PASS_IMPROVEMENTS_COUNT" -gt 0 ]]; then + if [[ "$FAIL_PASS_IMPROVEMENTS_COUNT" -le 5 ]]; then + MESSAGE_LINES+=(":white_check_mark: **Fail→Pass Improvements ($FAIL_PASS_IMPROVEMENTS_COUNT):**") + readarray -t test_paths < <(grep -A 100 "FAIL-TO-PASS IMPROVEMENTS" comprehensive_regression_report.txt | grep "^ [0-9]\+\." | head -$FAIL_PASS_IMPROVEMENTS_COUNT | sed 's/^ [0-9]\+\. //') + for test_path in "${test_paths[@]}"; do + MESSAGE_LINES+=("• \`$test_path\`") + done + else + MESSAGE_LINES+=(":white_check_mark: **Fail→Pass Improvements:** $FAIL_PASS_IMPROVEMENTS_COUNT tests (see attached file)") + fi + fi if [[ "$PASS_GONE_COUNT" -gt 0 ]]; then if [[ "$PASS_GONE_COUNT" -le 5 ]]; then @@ -1527,6 +1091,18 @@ jobs: MESSAGE_LINES+=("**Discovery Warnings:** $DISCOVERY_COUNT warnings (see attached file)") fi fi + + if [[ "$NEW_TESTS_COUNT" -gt 0 ]]; then + if [[ "$NEW_TESTS_COUNT" -le 5 ]]; then + MESSAGE_LINES+=(":sparkles: **New Tests ($NEW_TESTS_COUNT):**") + readarray -t test_paths < <(grep -A 100 "NEW TESTS" comprehensive_regression_report.txt | grep "^ [0-9]\+\." | head -$NEW_TESTS_COUNT | sed 's/^ [0-9]\+\. //') + for test_path in "${test_paths[@]}"; do + MESSAGE_LINES+=("• \`$test_path\`") + done + else + MESSAGE_LINES+=(":sparkles: **New Tests:** $NEW_TESTS_COUNT tests (see attached file)") + fi + fi else # Fallback to simple regression count @@ -1544,6 +1120,52 @@ jobs: MESSAGE_LINES+=(" - Target Branch (\`${TARGET_BRANCH}\`): **${TARGET_PASSED_TESTS}/${TARGET_TOTAL_TESTS} passed (${TARGET_PERCENTAGE}%)**") fi + if [[ "$HAS_REGRESSIONS" != "true" ]] && [ -f "comprehensive_regression_report.txt" ]; then + FAIL_SKIP_IMPROVEMENTS_COUNT=$(grep -o "FAIL-TO-SKIP IMPROVEMENTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") + FAIL_PASS_IMPROVEMENTS_COUNT=$(grep -o "FAIL-TO-PASS IMPROVEMENTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") + NEW_TESTS_COUNT=$(grep -o "NEW TESTS (\([0-9]*\)" comprehensive_regression_report.txt | grep -o "[0-9]*" || echo "0") + + if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -gt 0 || "$FAIL_PASS_IMPROVEMENTS_COUNT" -gt 0 || "$NEW_TESTS_COUNT" -gt 0 ]]; then + MESSAGE_LINES+=("**:sparkles: Improvements & Additions**") + + if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -gt 0 ]]; then + if [[ "$FAIL_SKIP_IMPROVEMENTS_COUNT" -le 5 ]]; then + MESSAGE_LINES+=(":white_check_mark: **Fail→Skip Improvements ($FAIL_SKIP_IMPROVEMENTS_COUNT):**") + readarray -t test_paths < <(grep -A 100 "FAIL-TO-SKIP IMPROVEMENTS" comprehensive_regression_report.txt | grep "^ [0-9]\+\." | head -$FAIL_SKIP_IMPROVEMENTS_COUNT | sed 's/^ [0-9]\+\. //') + for test_path in "${test_paths[@]}"; do + MESSAGE_LINES+=("• \`$test_path\`") + done + else + MESSAGE_LINES+=(":white_check_mark: **Fail→Skip Improvements:** $FAIL_SKIP_IMPROVEMENTS_COUNT tests (see attached file)") + fi + fi + + if [[ "$FAIL_PASS_IMPROVEMENTS_COUNT" -gt 0 ]]; then + if [[ "$FAIL_PASS_IMPROVEMENTS_COUNT" -le 5 ]]; then + MESSAGE_LINES+=(":white_check_mark: **Fail→Pass Improvements ($FAIL_PASS_IMPROVEMENTS_COUNT):**") + readarray -t test_paths < <(grep -A 100 "FAIL-TO-PASS IMPROVEMENTS" comprehensive_regression_report.txt | grep "^ [0-9]\+\." | head -$FAIL_PASS_IMPROVEMENTS_COUNT | sed 's/^ [0-9]\+\. //') + for test_path in "${test_paths[@]}"; do + MESSAGE_LINES+=("• \`$test_path\`") + done + else + MESSAGE_LINES+=(":white_check_mark: **Fail→Pass Improvements:** $FAIL_PASS_IMPROVEMENTS_COUNT tests (see attached file)") + fi + fi + + if [[ "$NEW_TESTS_COUNT" -gt 0 ]]; then + if [[ "$NEW_TESTS_COUNT" -le 5 ]]; then + MESSAGE_LINES+=(":sparkles: **New Tests ($NEW_TESTS_COUNT):**") + readarray -t test_paths < <(grep -A 100 "NEW TESTS" comprehensive_regression_report.txt | grep "^ [0-9]\+\." | head -$NEW_TESTS_COUNT | sed 's/^ [0-9]\+\. //') + for test_path in "${test_paths[@]}"; do + MESSAGE_LINES+=("• \`$test_path\`") + done + else + MESSAGE_LINES+=(":sparkles: **New Tests:** $NEW_TESTS_COUNT tests (see attached file)") + fi + fi + fi + fi + MESSAGE_LINES+=("---") MESSAGE_LINES+=("[View Workflow Run](${ACTION_RUN_URL})")