diff --git a/.github/workflows/backend-selective.yml b/.github/workflows/backend-selective.yml new file mode 100644 index 00000000000000..96e9437646c040 --- /dev/null +++ b/.github/workflows/backend-selective.yml @@ -0,0 +1,261 @@ +name: '[NOT REQUIRED] backend (selective)' + +on: + pull_request: + paths: + - 'src/sentry/preprod/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +# hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359 +env: + SEGMENT_DOWNLOAD_TIMEOUT_MINS: 3 + SNUBA_NO_WORKERS: 1 + +jobs: + files-changed: + name: detect what files changed + runs-on: ubuntu-24.04 + timeout-minutes: 3 + continue-on-error: true + outputs: + api_docs: ${{ steps.changes.outputs.api_docs }} + backend: ${{ steps.changes.outputs.backend_all }} + backend_dependencies: ${{ steps.changes.outputs.backend_dependencies }} + backend_api_urls: ${{ steps.changes.outputs.backend_api_urls }} + backend_any_type: ${{ steps.changes.outputs.backend_any_type }} + migration_lockfile: ${{ steps.changes.outputs.migration_lockfile }} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Check for backend file changes + uses: dorny/paths-filter@0bc4621a3135347011ad047f9ecf449bf72ce2bd # v3.0.0 + id: changes + with: + token: ${{ github.token }} + filters: .github/file-filters.yml + + prepare-selective-tests: + if: needs.files-changed.outputs.backend == 'true' + needs: files-changed + name: prepare selective tests + runs-on: ubuntu-24.04 + timeout-minutes: 10 + continue-on-error: true + permissions: + contents: read + id-token: write + outputs: + has-coverage: ${{ steps.find-coverage.outputs.found }} + coverage-sha: ${{ steps.find-coverage.outputs.coverage-sha }} + changed-files: ${{ steps.changed-files.outputs.files }} + test-count: ${{ steps.compute-tests.outputs.test-count }} + has-selected-tests: ${{ steps.compute-tests.outputs.has-selected-tests }} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + fetch-depth: 0 # Need full history for git diff + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.13.1' + + - name: Authenticate to Google Cloud + id: gcloud-auth + uses: google-github-actions/auth@v2 + with: + project_id: sentry-dev-tooling + workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }} + service_account: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }} + + - name: Find coverage data for selective testing + id: find-coverage + env: + GCS_BUCKET: sentry-coverage-data + run: | + set -euo pipefail + + # Get the base commit (what the PR branches from) + BASE_SHA="${{ github.event.pull_request.base.sha }}" + + echo "Looking for coverage data starting from base commit: $BASE_SHA" + + COVERAGE_SHA="" + for sha in $(git rev-list "$BASE_SHA" --max-count=30); do + # Check if coverage exists in GCS for this commit + if gcloud storage ls "gs://${GCS_BUCKET}/${sha}/" &>/dev/null; then + COVERAGE_SHA="$sha" + echo "Found coverage data at commit: $sha" + break + fi + echo "No coverage at $sha, checking parent..." + done + + if [[ -z "$COVERAGE_SHA" ]]; then + echo "No coverage found in last 30 commits, will run full test suite" + echo "found=false" >> "$GITHUB_OUTPUT" + else + echo "found=true" >> "$GITHUB_OUTPUT" + echo "coverage-sha=$COVERAGE_SHA" >> "$GITHUB_OUTPUT" + fi + + - name: Download coverage database + id: download-coverage + if: steps.find-coverage.outputs.found == 'true' + env: + COVERAGE_SHA: ${{ steps.find-coverage.outputs.coverage-sha }} + run: | + set -euxo pipefail + mkdir -p .coverage + + if ! gcloud storage cp "gs://sentry-coverage-data/${COVERAGE_SHA}/.coverage.combined" .coverage/; then + echo "Warning: Failed to download coverage file" + echo "coverage-file=" >> "$GITHUB_OUTPUT" + exit 0 + fi + + if [[ ! -f .coverage/.coverage.combined ]]; then + echo "Warning: Coverage file not found after download" + ls -la .coverage/ || true + echo "coverage-file=" >> "$GITHUB_OUTPUT" + else + echo "Downloaded coverage file: .coverage/.coverage.combined" + echo "coverage-file=.coverage/.coverage.combined" >> "$GITHUB_OUTPUT" + fi + + - name: Get changed files + id: changed-files + run: | + # Get files changed between base and head of PR + BASE_SHA="${{ github.event.pull_request.base.sha }}" + HEAD_SHA="${{ github.event.pull_request.head.sha }}" + + CHANGED_FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" | tr '\n' ' ') + echo "Changed files: $CHANGED_FILES" + echo "files=$CHANGED_FILES" >> "$GITHUB_OUTPUT" + + - name: Compute selected tests + id: compute-tests + if: steps.download-coverage.outputs.coverage-file != '' + env: + COVERAGE_DB: ${{ steps.download-coverage.outputs.coverage-file }} + CHANGED_FILES: ${{ steps.changed-files.outputs.files }} + run: make compute-selected-tests + + - name: Upload coverage database artifact + if: steps.download-coverage.outputs.coverage-file != '' + uses: actions/upload-artifact@v4 + with: + name: coverage-db-${{ github.run_id }} + path: .coverage/ + retention-days: 1 + include-hidden-files: true + + - name: Upload selected tests artifact + if: steps.compute-tests.outputs.has-selected-tests == 'true' + uses: actions/upload-artifact@v4 + with: + name: selected-tests-${{ github.run_id }} + path: .artifacts/selected-tests.txt + retention-days: 1 + + calculate-shards: + if: needs.files-changed.outputs.backend == 'true' + needs: [files-changed, prepare-selective-tests] + name: calculate test shards + runs-on: ubuntu-24.04 + timeout-minutes: 5 + continue-on-error: true + outputs: + shard-count: ${{ steps.calculate-shards.outputs.shard-count }} + shard-indices: ${{ steps.calculate-shards.outputs.shard-indices }} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Setup sentry env + uses: ./.github/actions/setup-sentry + id: setup + with: + mode: backend-ci + skip-devservices: true + + - name: Download selected tests artifact + if: needs.prepare-selective-tests.outputs.has-selected-tests == 'true' + uses: actions/download-artifact@v4 + with: + name: selected-tests-${{ github.run_id }} + path: .artifacts/ + + - name: Calculate test shards + id: calculate-shards + env: + SELECTED_TESTS_FILE: ${{ needs.prepare-selective-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || '' }} + SELECTED_TEST_COUNT: ${{ needs.prepare-selective-tests.outputs.test-count }} + run: | + python3 .github/workflows/scripts/calculate-backend-test-shards.py + + backend-test-selective: + if: needs.files-changed.outputs.backend == 'true' + needs: [files-changed, prepare-selective-tests, calculate-shards] + name: backend tests + runs-on: ubuntu-24.04 + timeout-minutes: 60 + continue-on-error: true + permissions: + contents: read + id-token: write + actions: read # used for DIM metadata + strategy: + fail-fast: false + matrix: + instance: ${{ fromJSON(needs.calculate-shards.outputs.shard-indices) }} + + env: + MATRIX_INSTANCE_TOTAL: ${{ needs.calculate-shards.outputs.shard-count }} + TEST_GROUP_STRATEGY: roundrobin + + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Setup sentry env + uses: ./.github/actions/setup-sentry + id: setup + with: + mode: backend-ci + + - name: Download selected tests artifact + if: needs.prepare-selective-tests.outputs.has-selected-tests == 'true' + uses: actions/download-artifact@v4 + with: + name: selected-tests-${{ github.run_id }} + path: .artifacts/ + + - name: Run backend tests (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }}) + id: run_backend_tests + run: | + if [[ "${{ needs.prepare-selective-tests.outputs.has-selected-tests }}" == "true" ]]; then + make test-backend-ci-selective SELECTED_TESTS_FILE=.artifacts/selected-tests.txt + else + make test-python-ci + fi + + - name: Inspect failure + if: failure() + run: | + if command -v devservices; then + devservices logs + fi + + - name: Collect test data + uses: ./.github/actions/collect-test-data + if: ${{ !cancelled() }} + with: + artifact_path: .artifacts/pytest.json + gcs_bucket: ${{ secrets.COLLECT_TEST_DATA_GCS_BUCKET }} + gcp_project_id: ${{ secrets.COLLECT_TEST_DATA_GCP_PROJECT_ID }} + workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }} + service_account_email: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }} + matrix_instance_number: ${{ steps.setup.outputs.matrix-instance-number }} diff --git a/.github/workflows/scripts/calculate-backend-test-shards.py b/.github/workflows/scripts/calculate-backend-test-shards.py index fca159736127d4..e2677653ea7197 100755 --- a/.github/workflows/scripts/calculate-backend-test-shards.py +++ b/.github/workflows/scripts/calculate-backend-test-shards.py @@ -5,17 +5,17 @@ import re import subprocess import sys +from pathlib import Path TESTS_PER_SHARD = 1200 MIN_SHARDS = 1 MAX_SHARDS = 22 DEFAULT_SHARDS = 22 -PYTEST_ARGS = [ +PYTEST_BASE_ARGS = [ "pytest", "--collect-only", "--quiet", - "tests", "--ignore=tests/acceptance", "--ignore=tests/apidocs", "--ignore=tests/js", @@ -23,10 +23,31 @@ ] -def collect_test_count(): +def collect_test_count() -> int | None: + """Collect the number of tests to run, either from selected files or full suite.""" + selected_tests_file = os.environ.get("SELECTED_TESTS_FILE") + + if selected_tests_file: + path = Path(selected_tests_file) + if not path.exists(): + print(f"Selected tests file not found: {selected_tests_file}", file=sys.stderr) + return None + + with path.open() as f: + selected_files = [line.strip() for line in f if line.strip()] + + if not selected_files: + print("No selected test files, running 0 tests", file=sys.stderr) + return 0 + + print(f"Counting tests in {len(selected_files)} selected files", file=sys.stderr) + pytest_args = PYTEST_BASE_ARGS + selected_files + else: + pytest_args = PYTEST_BASE_ARGS + ["tests"] + try: result = subprocess.run( - PYTEST_ARGS, + pytest_args, capture_output=True, text=True, check=False, @@ -40,7 +61,6 @@ def collect_test_count(): print(f"Collected {count} tests", file=sys.stderr) return count - # If no match, check if pytest failed if result.returncode != 0: print( f"Pytest collection failed (exit {result.returncode})", @@ -56,7 +76,7 @@ def collect_test_count(): return None -def calculate_shards(test_count): +def calculate_shards(test_count: int | None) -> int: if test_count is None: print(f"Using default shard count: {DEFAULT_SHARDS}", file=sys.stderr) return DEFAULT_SHARDS @@ -82,10 +102,9 @@ def calculate_shards(test_count): return bounded -def main(): +def main() -> int: test_count = collect_test_count() shard_count = calculate_shards(test_count) - # Generate a JSON array of shard indices [0, 1, 2, ..., shard_count-1] shard_indices = json.dumps(list(range(shard_count))) github_output = os.getenv("GITHUB_OUTPUT") diff --git a/.github/workflows/scripts/compute-selected-tests.py b/.github/workflows/scripts/compute-selected-tests.py new file mode 100644 index 00000000000000..4e2c4cde4305cb --- /dev/null +++ b/.github/workflows/scripts/compute-selected-tests.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import os +import sqlite3 +import sys +from pathlib import Path + +# Files that, if changed, should trigger the full test suite (can't determine affected tests) +FULL_SUITE_TRIGGER_FILES = [ + "sentry/testutils/pytest/sentry.py", + "pyproject.toml", + # "Makefile", + "sentry/conf/server.py", + "sentry/web/urls.py", +] + + +def should_run_full_suite(changed_files: list[str]) -> bool: + for file_path in changed_files: + if any(file_path.endswith(trigger) for trigger in FULL_SUITE_TRIGGER_FILES): + return True + return False + + +def get_changed_test_files(changed_files: list[str]) -> set[str]: + test_files: set[str] = set() + for file_path in changed_files: + # Match test files in the tests/ directory + if file_path.startswith("tests/") and file_path.endswith(".py"): + test_files.add(file_path) + return test_files + + +def get_affected_test_files(coverage_db_path: str, changed_files: list[str]) -> set[str]: + affected_test_files: set[str] = set() + + conn = sqlite3.connect(coverage_db_path) + cur = conn.cursor() + + # Verify required tables exist (need context tracking enabled) + tables = { + r[0] for r in cur.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall() + } + if "line_bits" not in tables or "context" not in tables: + raise ValueError( + "Coverage database missing line_bits/context tables. " + "Coverage must be collected with --cov-context=test" + ) + + test_contexts: set[str] = set() + + for file_path in changed_files: + cur.execute( + """ + SELECT c.context, lb.numbits + FROM line_bits lb + JOIN file f ON lb.file_id = f.id + JOIN context c ON lb.context_id = c.id + WHERE f.path LIKE '%' || ? + AND c.context != '' + """, + (f"%{file_path}",), + ) + + for context, bitblob in cur.fetchall(): + if any(b != 0 for b in bytes(bitblob)): + test_contexts.add(context) + + conn.close() + + # Extract test file paths from contexts + # Context format: 'tests/foo/bar.py::TestClass::test_function|run' + for context in test_contexts: + test_file = context.split("::", 1)[0] + affected_test_files.add(test_file) + + return affected_test_files + + +def main() -> int: + parser = argparse.ArgumentParser(description="Compute selected tests from coverage data") + parser.add_argument("--coverage-db", required=True, help="Path to coverage SQLite database") + parser.add_argument( + "--changed-files", required=True, help="Space-separated list of changed files" + ) + parser.add_argument("--output", help="Output file path for selected test files (one per line)") + parser.add_argument("--github-output", action="store_true", help="Write to GITHUB_OUTPUT") + args = parser.parse_args() + + coverage_db = Path(args.coverage_db) + if not coverage_db.exists(): + print(f"Error: Coverage database not found: {coverage_db}", file=sys.stderr) + return 1 + + changed_files = [f.strip() for f in args.changed_files.split() if f.strip()] + if not changed_files: + print("No changed files provided, running full test suite") + affected_test_files: set[str] = set() + elif should_run_full_suite(changed_files): + triggered_by = [ + f for f in changed_files if any(f.endswith(t) for t in FULL_SUITE_TRIGGER_FILES) + ] + print(f"Full test suite triggered by: {', '.join(triggered_by)}") + affected_test_files = set() + else: + print(f"Computing selected tests for {len(changed_files)} changed files...") + try: + affected_test_files = get_affected_test_files(str(coverage_db), changed_files) + except sqlite3.Error as e: + print(f"Error querying coverage database: {e}", file=sys.stderr) + return 1 + + # Also include any test files that were directly changed/added in the PR + changed_test_files = get_changed_test_files(changed_files) + if changed_test_files: + print(f"Including {len(changed_test_files)} directly changed test files") + affected_test_files.update(changed_test_files) + + print(f"Found {len(affected_test_files)} affected test files") + + if args.output: + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w") as f: + for test_file in sorted(affected_test_files): + f.write(f"{test_file}\n") + print(f"Wrote selected tests to {output_path}") + + if args.github_output: + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a") as f: + f.write(f"test-count={len(affected_test_files)}\n") + f.write(f"has-selected-tests={'true' if affected_test_files else 'false'}\n") + print(f"Wrote to GITHUB_OUTPUT: test-count={len(affected_test_files)}") + + if affected_test_files: + print("\nAffected test files:") + for test_file in sorted(affected_test_files): + print(f" {test_file}") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Makefile b/Makefile index 46dd5ef291ac49..056738315dfaba 100644 --- a/Makefile +++ b/Makefile @@ -151,6 +151,26 @@ test-backend-ci-with-coverage: -o junit_suite_name=pytest @echo "" +test-backend-ci-selective: + @echo "--> Running CI Python tests (selective)" + python3 -b -m pytest \ + $$(cat $(SELECTED_TESTS_FILE)) \ + --json-report \ + --json-report-file=".artifacts/pytest.json" \ + --json-report-omit=log \ + --junit-xml=.artifacts/pytest.junit.xml \ + -o junit_suite_name=pytest + @echo "" + +compute-selected-tests: + @echo "--> Computing selected tests from coverage data" + python3 .github/workflows/scripts/compute-selected-tests.py \ + --coverage-db "$(COVERAGE_DB)" \ + --changed-files "$(CHANGED_FILES)" \ + --output .artifacts/selected-tests.txt \ + --github-output + @echo "" + # it's not possible to change settings.DATABASE after django startup, so # unfortunately these tests must be run in a separate pytest process. References: # * https://docs.djangoproject.com/en/4.2/topics/testing/tools/#overriding-settings