Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,7 @@ dmypy.json
*.pem
junit.xml

.DS_Store
.DS_Store

# Cursor (user/IDE rules - do not commit)
.cursor/
44 changes: 34 additions & 10 deletions apps/worker/services/bundle_analysis/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,22 @@ def _attempt_init_from_previous_report(

@sentry_sdk.trace
def process_upload(
self, commit: Commit, upload: Upload, compare_sha: str | None = None
self,
commit: Commit,
upload: Upload,
compare_sha: str | None = None,
pre_downloaded_path: str | None = None,
) -> ProcessingResult:
"""
Download and parse the data associated with the given upload and
merge the results into a bundle report.

Args:
commit: The commit being processed
upload: The upload record
compare_sha: Optional SHA for comparison
pre_downloaded_path: Optional path to pre-downloaded upload file.
If provided, skips the GCS download (optimization for reducing lock time).
"""
commit_report: CommitReport = upload.report
bundle_loader = BundleAnalysisReportLoader(commit_report.commit.repository)
Expand All @@ -241,20 +252,32 @@ def process_upload(
commit, bundle_loader
)

# download raw upload data to local tempfile
_, local_path = tempfile.mkstemp()
if pre_downloaded_path and os.path.exists(pre_downloaded_path):
local_path = pre_downloaded_path
should_cleanup_local = False
log.info(
"Using pre-downloaded upload file",
extra={
"repoid": commit.repoid,
"commit": commit.commitid,
"local_path": local_path,
},
)
else:
_, local_path = tempfile.mkstemp()
should_cleanup_local = True

try:
session_id, prev_bar, bundle_name = None, None, None
if upload.storage_path != "":
with open(local_path, "wb") as f:
storage_service.read_file(
get_bucket_name(), upload.storage_path, file_obj=f
)
if should_cleanup_local:
with open(local_path, "wb") as f:
storage_service.read_file(
get_bucket_name(), upload.storage_path, file_obj=f
)

# load the downloaded data into the bundle report
session_id, bundle_name = bundle_report.ingest(local_path, compare_sha)

# Retrieve previous commit's BAR and associate past Assets
prev_bar = self._previous_bundle_analysis_report(
bundle_loader, commit, head_bundle_report=bundle_report
)
Expand Down Expand Up @@ -332,7 +355,8 @@ def process_upload(
),
)
finally:
os.remove(local_path)
if should_cleanup_local and os.path.exists(local_path):
os.remove(local_path)

return ProcessingResult(
upload=upload,
Expand Down
73 changes: 73 additions & 0 deletions apps/worker/services/bundle_analysis/tests/test_bundle_analysis.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from textwrap import dedent
from unittest.mock import PropertyMock

Expand Down Expand Up @@ -31,6 +32,78 @@
from shared.yaml import UserYaml
from tests.helpers import mock_all_plans_and_tiers

# Path to sample bundle stats files
SAMPLE_DIR = os.path.join(
os.path.dirname(__file__),
"..",
"..",
"..",
"..",
"..",
"libs",
"shared",
"tests",
"samples",
)


@pytest.mark.django_db(databases={"default", "timeseries"})
def test_process_upload_with_pre_downloaded_path(dbsession, mocker, mock_storage):
"""Test that process_upload uses pre_downloaded_path and skips GCS download"""
storage_path = (
"v1/repos/testing/ed1bdd67-8fd2-4cdb-ac9e-39b99e4a3892/bundle_report.sqlite"
)
mock_storage.write_file(get_bucket_name(), storage_path, "test-content")

commit = CommitFactory()
dbsession.add(commit)
dbsession.commit()

commit_report = CommitReport(
commit=commit, report_type=ReportType.BUNDLE_ANALYSIS.value
)
dbsession.add(commit_report)
dbsession.commit()

upload = UploadFactory.create(storage_path=storage_path, report=commit_report)
dbsession.add(upload)
dbsession.commit()

# Create a pre-downloaded file with test content
sample_path = os.path.join(SAMPLE_DIR, "sample_bundle_stats.json")

# Mock ingest to track calls
mock_ingest = mocker.patch(
"shared.bundle_analysis.BundleAnalysisReport.ingest",
return_value=(123, "sample"),
)

# Mock storage read to track that it's NOT called when pre_downloaded_path is provided
storage_read_spy = mocker.spy(mock_storage, "read_file")

report_service = BundleAnalysisReportService(UserYaml.from_dict({}))
result = report_service.process_upload(
commit, upload, pre_downloaded_path=sample_path
)

assert result.session_id == 123
assert result.bundle_name == "sample"
assert result.error is None

# Verify ingest was called with the pre-downloaded path
mock_ingest.assert_called_once()
call_args = mock_ingest.call_args
assert call_args[0][0] == sample_path # First positional arg should be the path

# Verify storage read was NOT called to download the upload file
# (it may be called once to load the existing bundle report)
for call in storage_read_spy.call_args_list:
# The upload's storage_path should not have been read
if len(call[0]) >= 2:
assert call[0][1] != upload.storage_path, (
"Storage should not download upload file when pre_downloaded_path is provided"
)


class MockBundleReport:
def __init__(self, name):
Expand Down
Loading