Skip to content

Commit e845ba5

Browse files
committed
perf(delta-upgrade): lazy chain walk, GHCR retry, parallel I/O, offline cache
Four performance optimizations for the delta upgrade system: ## Lazy chain resolution (replaces eager graph build) Replace buildNightlyPatchGraph + walkNightlyChain with a lazy approach in resolveNightlyChain: - List all patch tags (1 HTTP call, kept) - Filter tags to only those between currentVersion and targetVersion using Bun.semver.order() — since patches are sequential, this gives the exact chain without fetching any manifests - Sort filtered tags by version - Fetch only the 1-2 manifests in the chain (instead of all N) This reduces manifest fetches from ~14 to 1-2 for typical upgrades. ## Retry with timeout for GHCR requests Add fetchWithRetry helper in ghcr.ts: - 10s timeout per request (AbortSignal.timeout) - 1 retry on transient errors (timeout, network, connection reset) - Applied to getAnonymousToken, fetchManifest, fetchTagPage - downloadNightlyBlob gets 30s timeout (no retry — large downloads) ## Parallel I/O fetchManifest(targetTag) and listTags() run concurrently via Promise.all after token acquisition — both only depend on the token, not each other. ## Patch pre-fetch for offline upgrades New file-based cache in ~/.sentry/patch-cache/ enables fully offline delta upgrades for both nightly and stable channels: - Background version check now pre-fetches delta patches (~50-80KB) after discovering a new version - Patches accumulate across runs: skip 3 versions, all 3 patches are cached, multi-hop upgrade is fully offline - resolveStableDelta/resolveNightlyDelta check cache before network - delta.source span attribute tracks cache vs network for telemetry - 7-day TTL with opportunistic cleanup during version checks - Two-pass cleanup preserves shared patches referenced by live chains
1 parent ea2925a commit e845ba5

File tree

9 files changed

+1709
-604
lines changed

9 files changed

+1709
-604
lines changed

AGENTS.md

Lines changed: 40 additions & 46 deletions
Large diffs are not rendered by default.

src/lib/delta-upgrade.ts

Lines changed: 460 additions & 160 deletions
Large diffs are not rendered by default.

src/lib/ghcr.ts

Lines changed: 99 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,82 @@
2020
import { getUserAgent } from "./constants.js";
2121
import { UpgradeError } from "./errors.js";
2222

23+
/** Default timeout for GHCR HTTP requests (10 seconds) */
24+
const GHCR_REQUEST_TIMEOUT = 10_000;
25+
26+
/** Maximum number of retry attempts for transient failures */
27+
const GHCR_MAX_RETRIES = 1;
28+
29+
/** Timeout for large blob downloads (30 seconds) */
30+
const GHCR_BLOB_TIMEOUT = 30_000;
31+
32+
/**
33+
* Check if an error is a transient network/timeout failure worth retrying.
34+
*
35+
* Matches timeout/abort errors from `AbortSignal.timeout()`, connection
36+
* resets, and generic network failures. Does NOT match HTTP-level errors
37+
* (those are handled by the caller after receiving a Response).
38+
*/
39+
function isRetryableError(error: Error): boolean {
40+
// AbortSignal.timeout() throws a TimeoutError DOMException — check by name
41+
// rather than relying on error message content (which varies across runtimes)
42+
if (error.name === "TimeoutError" || error.name === "AbortError") {
43+
return true;
44+
}
45+
const msg = error.message.toLowerCase();
46+
return (
47+
msg.includes("timeout") ||
48+
msg.includes("econnreset") ||
49+
msg.includes("econnrefused") ||
50+
msg.includes("network") ||
51+
msg.includes("fetch failed")
52+
);
53+
}
54+
55+
/**
56+
* Fetch with timeout and retry for GHCR requests.
57+
*
58+
* GHCR exhibits cold-start latency spikes (126ms → 30s for identical
59+
* requests). A short timeout + retry keeps the worst case at ~20s instead
60+
* of 30s, and helps when the first request hits a cold instance.
61+
*
62+
* @param url - Request URL
63+
* @param init - Fetch init options (signal will be added/overridden)
64+
* @param context - Human-readable context for error messages
65+
* @param timeout - Per-request timeout in milliseconds
66+
* @returns Response from a successful fetch
67+
* @throws {UpgradeError} On all attempts exhausted
68+
*/
69+
async function fetchWithRetry(
70+
url: string,
71+
init: RequestInit,
72+
context: string,
73+
timeout = GHCR_REQUEST_TIMEOUT
74+
): Promise<Response> {
75+
let lastError: Error | undefined;
76+
77+
for (let attempt = 0; attempt <= GHCR_MAX_RETRIES; attempt++) {
78+
try {
79+
const response = await fetch(url, {
80+
...init,
81+
signal: AbortSignal.timeout(timeout),
82+
});
83+
return response;
84+
} catch (error) {
85+
lastError = error instanceof Error ? error : new Error(String(error));
86+
// Only retry on timeout or network errors — not HTTP errors
87+
if (attempt >= GHCR_MAX_RETRIES || !isRetryableError(lastError)) {
88+
break;
89+
}
90+
}
91+
}
92+
93+
throw new UpgradeError(
94+
"network_error",
95+
`${context}: ${lastError?.message ?? "unknown error"}`
96+
);
97+
}
98+
2399
/** GHCR repository for CLI distribution */
24100
export const GHCR_REPO = "getsentry/cli";
25101

@@ -81,18 +157,11 @@ export type OciManifest = {
81157
*/
82158
export async function getAnonymousToken(): Promise<string> {
83159
const url = `${GHCR_REGISTRY}/token?scope=repository:${GHCR_REPO}:pull`;
84-
let response: Response;
85-
try {
86-
response = await fetch(url, {
87-
headers: { "User-Agent": getUserAgent() },
88-
});
89-
} catch (error) {
90-
const msg = error instanceof Error ? error.message : String(error);
91-
throw new UpgradeError(
92-
"network_error",
93-
`Failed to connect to GHCR: ${msg}`
94-
);
95-
}
160+
const response = await fetchWithRetry(
161+
url,
162+
{ headers: { "User-Agent": getUserAgent() } },
163+
"Failed to connect to GHCR"
164+
);
96165

97166
if (!response.ok) {
98167
throw new UpgradeError(
@@ -125,22 +194,17 @@ export async function fetchManifest(
125194
tag: string
126195
): Promise<OciManifest> {
127196
const url = `${GHCR_REGISTRY}/v2/${GHCR_REPO}/manifests/${tag}`;
128-
let response: Response;
129-
try {
130-
response = await fetch(url, {
197+
const response = await fetchWithRetry(
198+
url,
199+
{
131200
headers: {
132201
Authorization: `Bearer ${token}`,
133202
Accept: OCI_MANIFEST_TYPE,
134203
"User-Agent": getUserAgent(),
135204
},
136-
});
137-
} catch (error) {
138-
const msg = error instanceof Error ? error.message : String(error);
139-
throw new UpgradeError(
140-
"network_error",
141-
`Failed to connect to GHCR: ${msg}`
142-
);
143-
}
205+
},
206+
`Failed to fetch manifest for tag "${tag}"`
207+
);
144208

145209
if (!response.ok) {
146210
throw new UpgradeError(
@@ -244,6 +308,7 @@ export async function downloadNightlyBlob(
244308
"User-Agent": getUserAgent(),
245309
},
246310
redirect: "manual",
311+
signal: AbortSignal.timeout(GHCR_BLOB_TIMEOUT),
247312
});
248313
} catch (error) {
249314
const msg = error instanceof Error ? error.message : String(error);
@@ -275,6 +340,11 @@ export async function downloadNightlyBlob(
275340
// Step 2: Follow the redirect WITHOUT the Authorization header.
276341
// Azure rejects requests that include a Bearer token alongside its own
277342
// signed query-string credentials (returns 404).
343+
// No AbortSignal.timeout here: this fetch covers both connection AND
344+
// body streaming. For full nightly binaries (~30 MB), a 30s timeout
345+
// would require sustained ~8 Mbps throughput and fail on slow connections.
346+
// The GHCR step 1 timeout above guards against GHCR-side latency;
347+
// Azure Blob Storage has reliable latency characteristics.
278348
let redirectResponse: Response;
279349
try {
280350
redirectResponse = await fetch(redirectUrl, {
@@ -324,18 +394,16 @@ async function fetchTagPage(
324394
url += `&last=${encodeURIComponent(lastTag)}`;
325395
}
326396

327-
let response: Response;
328-
try {
329-
response = await fetch(url, {
397+
const response = await fetchWithRetry(
398+
url,
399+
{
330400
headers: {
331401
Authorization: `Bearer ${token}`,
332402
"User-Agent": getUserAgent(),
333403
},
334-
});
335-
} catch (error) {
336-
const msg = error instanceof Error ? error.message : String(error);
337-
throw new UpgradeError("network_error", `Failed to list GHCR tags: ${msg}`);
338-
}
404+
},
405+
"Failed to list GHCR tags"
406+
);
339407

340408
if (!response.ok) {
341409
throw new UpgradeError(

0 commit comments

Comments
 (0)