Skip to content

Commit 74eba04

Browse files
committed
Add backend selective testing workflow
1 parent 90e74c8 commit 74eba04

File tree

4 files changed

+231
-2
lines changed

4 files changed

+231
-2
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
name: backend - selective
2+
3+
on:
4+
pull_request:
5+
6+
# Cancel in progress workflows on pull_requests.
7+
# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value
8+
concurrency:
9+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10+
cancel-in-progress: true
11+
12+
jobs:
13+
backend-test-selective:
14+
name: backend test (selective)
15+
runs-on: ubuntu-24.04
16+
timeout-minutes: 60
17+
permissions:
18+
contents: read
19+
id-token: write
20+
actions: read # used for DIM metadata
21+
strategy:
22+
fail-fast: false
23+
matrix:
24+
instance:
25+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
26+
27+
env:
28+
MATRIX_INSTANCE_TOTAL: 22
29+
30+
steps:
31+
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
32+
33+
- name: Setup sentry env
34+
uses: ./.github/actions/setup-sentry
35+
id: setup
36+
with:
37+
mode: backend-ci
38+
39+
# TODO: Gcloud
40+
- name: Download coverage database
41+
uses: actions/download-artifact@v4
42+
with:
43+
name: pycoverage-sqlite-combined-20529759656
44+
path: .coverage
45+
run-id: 20529759656
46+
47+
- name: Get changed files
48+
id: changed-files
49+
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62
50+
51+
- name: List all changed files
52+
env:
53+
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
54+
run: |
55+
for file in ${ALL_CHANGED_FILES}; do
56+
echo "$file was changed"
57+
done
58+
59+
- name: Run backend tests (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }})
60+
id: run_backend_tests
61+
run: make test-python-ci
62+
env:
63+
SELECTIVE_TESTING_ENABLED: true
64+
CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
65+
COVERAGE_DB_PATH: .coverage
66+
67+
- name: Inspect failure
68+
if: failure()
69+
run: |
70+
if command -v devservices; then
71+
devservices logs
72+
fi
73+
74+
# - name: Collect test data
75+
# uses: ./.github/actions/collect-test-data
76+
# if: ${{ !cancelled() }}
77+
# with:
78+
# artifact_path: .artifacts/pytest.json # TODO
79+
# gcs_bucket: ${{ secrets.COLLECT_TEST_DATA_GCS_BUCKET }}
80+
# gcp_project_id: ${{ secrets.COLLECT_TEST_DATA_GCP_PROJECT_ID }}
81+
# workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }}
82+
# service_account_email: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }}
83+
# matrix_instance_number: ${{ steps.setup.outputs.matrix-instance-number }}

src/sentry/preprod/size_analysis/compare.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ def compare_size_analysis(
155155
base_download_size=base_size_analysis.max_download_size,
156156
)
157157

158+
# Placeholder
159+
158160
# Compare insights only if we're not skipping the comparison
159161
insight_diff_items = []
160162
if not skip_diff_item_comparison:
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import sqlite3
5+
import sys
6+
7+
8+
def _file_executed(bitblob: bytes) -> bool:
9+
"""
10+
Returns True if any line in the file was executed (bitblob has any bits set).
11+
"""
12+
return any(b != 0 for b in bitblob)
13+
14+
15+
def get_affected_tests_from_coverage(db_path: str, source_files: list[str]) -> set[str] | None:
16+
"""
17+
Query the coverage database to find which tests executed code in the given source files.
18+
19+
Args:
20+
db_path: Path to the .coverage SQLite database
21+
source_files: List of source file paths that have changed
22+
23+
Returns:
24+
Set of test file paths (e.g., 'tests/sentry/api/test_foo.py'),
25+
or None if the database doesn't exist or there's an error.
26+
"""
27+
if not os.path.exists(db_path):
28+
return None
29+
30+
try:
31+
conn = sqlite3.connect(db_path)
32+
cur = conn.cursor()
33+
34+
test_contexts = set()
35+
36+
for file_path in source_files:
37+
# Query for test contexts that executed this file
38+
cur.execute(
39+
"""
40+
SELECT c.context, lb.numbits
41+
FROM line_bits lb
42+
JOIN file f ON lb.file_id = f.id
43+
JOIN context c ON lb.context_id = c.id
44+
WHERE f.path LIKE '%' || ?
45+
AND c.context != ''
46+
""",
47+
(f"/{file_path}",),
48+
)
49+
50+
for context, bitblob in cur.fetchall():
51+
if _file_executed(bitblob):
52+
test_contexts.add(context)
53+
54+
conn.close()
55+
56+
# Extract test file paths from contexts
57+
# Context format: 'tests/foo/bar.py::TestClass::test_function'
58+
test_files = set()
59+
for context in test_contexts:
60+
test_file = context.split("::", 1)[0]
61+
test_files.add(test_file)
62+
63+
return test_files
64+
65+
except (sqlite3.Error, Exception) as e:
66+
# Log the error but don't fail the test run
67+
print(f"Warning: Could not query coverage database: {e}", file=sys.stderr)
68+
return None
69+
70+
71+
def filter_items_by_coverage(items, changed_files: list[str], coverage_db_path: str):
72+
"""
73+
Filter pytest items to only include tests affected by the changed files.
74+
75+
Args:
76+
items: List of pytest.Item objects to filter
77+
changed_files: List of source files that have changed
78+
coverage_db_path: Path to the coverage database
79+
80+
Returns:
81+
Tuple of (selected_items, discarded_items, affected_test_files)
82+
where affected_test_files is the set of test files found in coverage data,
83+
or None if coverage data could not be loaded.
84+
"""
85+
affected_test_files = get_affected_tests_from_coverage(coverage_db_path, changed_files)
86+
87+
if affected_test_files is None:
88+
# Could not load coverage data, return all items as selected
89+
return list(items), [], None
90+
91+
# Filter items to only include tests from affected files
92+
selected_items = []
93+
discarded_items = []
94+
95+
for item in items:
96+
# Extract test file path from nodeid (e.g., 'tests/foo.py::TestClass::test_func')
97+
test_file = item.nodeid.split("::", 1)[0]
98+
if test_file in affected_test_files:
99+
selected_items.append(item)
100+
else:
101+
discarded_items.append(item)
102+
103+
return selected_items, discarded_items, affected_test_files

src/sentry/testutils/pytest/sentry.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from sentry.runner.importer import install_plugin_apps
2020
from sentry.silo.base import SiloMode
21+
from sentry.testutils.pytest.selective_testing import filter_items_by_coverage
2122
from sentry.testutils.region import TestEnvRegionDirectory
2223
from sentry.testutils.silo import monkey_patch_single_process_silo_mode_state
2324
from sentry.types import region
@@ -388,6 +389,43 @@ def _shuffle_d(dct: dict[K, V]) -> dict[K, V]:
388389
def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None:
389390
"""After collection, we need to select tests based on group and group strategy"""
390391

392+
initial_discard = []
393+
394+
# Selective testing based on coverage data
395+
if os.environ.get("SELECTIVE_TESTING_ENABLED"):
396+
changed_files_str = os.environ.get("CHANGED_FILES", "")
397+
coverage_db_path = os.environ.get("COVERAGE_DB_PATH", ".coverage.combined")
398+
399+
if changed_files_str:
400+
# Parse changed files from comma-separated string
401+
changed_files = [f.strip() for f in changed_files_str.split(",") if f.strip()]
402+
403+
config.get_terminal_writer().line(
404+
f"Selective testing enabled for {len(changed_files)} changed file(s)"
405+
)
406+
407+
# Filter tests using coverage data
408+
selected_items, discarded_items, affected_test_files = filter_items_by_coverage(
409+
items, changed_files, coverage_db_path
410+
)
411+
412+
if affected_test_files is not None:
413+
config.get_terminal_writer().line(
414+
f"Found {len(affected_test_files)} affected test file(s) from coverage data"
415+
)
416+
config.get_terminal_writer().line(
417+
f"Selected {len(selected_items)}/{len(items)} tests based on coverage"
418+
)
419+
420+
# Update items with filtered list
421+
items[:] = selected_items
422+
initial_discard = discarded_items
423+
else:
424+
config.get_terminal_writer().line(
425+
"Warning: Could not load coverage data, running all tests"
426+
)
427+
428+
# Existing grouping logic (unchanged)
391429
total_groups = int(os.environ.get("TOTAL_TEST_GROUPS", 1))
392430
current_group = int(os.environ.get("TEST_GROUP", 0))
393431
grouping_strategy = os.environ.get("TEST_GROUP_STRATEGY", "scope")
@@ -420,9 +458,12 @@ def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item
420458
config.get_terminal_writer().line(f"SENTRY_SHUFFLE_TESTS_SEED: {seed}")
421459
_shuffle(items, random.Random(seed))
422460

461+
# Combine discards from both selective testing and grouping
462+
all_discarded = initial_discard + discard
463+
423464
# This only needs to be done if there are items to be de-selected
424-
if len(discard) > 0:
425-
config.hook.pytest_deselected(items=discard)
465+
if len(all_discarded) > 0:
466+
config.hook.pytest_deselected(items=all_discarded)
426467

427468

428469
def pytest_xdist_setupnodes() -> None:

0 commit comments

Comments
 (0)