Skip to content

Commit af8de8c

Browse files
committed
perf(delta-upgrade): lazy chain walk, GHCR retry, parallel I/O, offline cache
Four performance optimizations for the delta upgrade system: ## Lazy chain resolution (replaces eager graph build) Replace buildNightlyPatchGraph + walkNightlyChain with a lazy approach in resolveNightlyChain: - List all patch tags (1 HTTP call, kept) - Filter tags to only those between currentVersion and targetVersion using Bun.semver.order() — since patches are sequential, this gives the exact chain without fetching any manifests - Sort filtered tags by version - Fetch only the 1-2 manifests in the chain (instead of all N) This reduces manifest fetches from ~14 to 1-2 for typical upgrades. ## Retry with timeout for GHCR requests Add fetchWithRetry helper in ghcr.ts: - 10s timeout per request (AbortSignal.timeout) - 1 retry on transient errors (timeout, network, connection reset) - Applied to getAnonymousToken, fetchManifest, fetchTagPage - downloadNightlyBlob gets 30s timeout (no retry — large downloads) ## Parallel I/O fetchManifest(targetTag) and listTags() run concurrently via Promise.all after token acquisition — both only depend on the token, not each other. ## Patch pre-fetch for offline upgrades New file-based cache in ~/.sentry/patch-cache/ enables fully offline delta upgrades for both nightly and stable channels: - Background version check now pre-fetches delta patches (~50-80KB) after discovering a new version - Patches accumulate across runs: skip 3 versions, all 3 patches are cached, multi-hop upgrade is fully offline - resolveStableDelta/resolveNightlyDelta check cache before network - delta.source span attribute tracks cache vs network for telemetry - 7-day TTL with opportunistic cleanup during version checks - Two-pass cleanup preserves shared patches referenced by live chains
1 parent ea2925a commit af8de8c

File tree

10 files changed

+1887
-620
lines changed

10 files changed

+1887
-620
lines changed

AGENTS.md

Lines changed: 40 additions & 46 deletions
Large diffs are not rendered by default.

src/lib/delta-upgrade.ts

Lines changed: 492 additions & 168 deletions
Large diffs are not rendered by default.

src/lib/ghcr.ts

Lines changed: 149 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,115 @@
2020
import { getUserAgent } from "./constants.js";
2121
import { UpgradeError } from "./errors.js";
2222

23+
/** Default timeout for GHCR HTTP requests (10 seconds) */
24+
const GHCR_REQUEST_TIMEOUT = 10_000;
25+
26+
/** Maximum number of retry attempts for transient failures */
27+
const GHCR_MAX_RETRIES = 1;
28+
29+
/** Timeout for large blob downloads (30 seconds) */
30+
const GHCR_BLOB_TIMEOUT = 30_000;
31+
32+
/**
33+
* Check if an error is a transient network/timeout failure worth retrying.
34+
*
35+
* Matches timeout/abort errors from `AbortSignal.timeout()`, connection
36+
* resets, and generic network failures. Does NOT match HTTP-level errors
37+
* (those are handled by the caller after receiving a Response).
38+
*/
39+
function isRetryableError(error: Error): boolean {
40+
// AbortSignal.timeout() throws a TimeoutError DOMException — check by name
41+
// rather than relying on error message content (which varies across runtimes)
42+
if (error.name === "TimeoutError" || error.name === "AbortError") {
43+
return true;
44+
}
45+
const msg = error.message.toLowerCase();
46+
return (
47+
msg.includes("timeout") ||
48+
msg.includes("econnreset") ||
49+
msg.includes("econnrefused") ||
50+
msg.includes("network") ||
51+
msg.includes("fetch failed")
52+
);
53+
}
54+
55+
/**
56+
* Fetch with timeout and retry for GHCR requests.
57+
*
58+
* GHCR exhibits cold-start latency spikes (126ms → 30s for identical
59+
* requests). A short timeout + retry keeps the worst case at ~20s instead
60+
* of 30s, and helps when the first request hits a cold instance.
61+
*
62+
* @param url - Request URL
63+
* @param init - Fetch init options (signal will be added/overridden)
64+
* @param context - Human-readable context for error messages
65+
* @param timeout - Per-request timeout in milliseconds
66+
* @returns Response from a successful fetch
67+
* @throws {UpgradeError} On all attempts exhausted
68+
*/
69+
/**
70+
* Build a combined abort signal from the per-request timeout and an
71+
* optional external signal (e.g., process-exit abort controller).
72+
*/
73+
function buildSignal(
74+
timeout: number,
75+
externalSignal?: AbortSignal
76+
): AbortSignal {
77+
const timeoutSignal = AbortSignal.timeout(timeout);
78+
return externalSignal
79+
? AbortSignal.any([timeoutSignal, externalSignal])
80+
: timeoutSignal;
81+
}
82+
83+
/**
84+
* Returns true when the given error was triggered by the external
85+
* (caller-provided) abort signal rather than by our timeout.
86+
*/
87+
function isExternalAbort(error: Error, externalSignal?: AbortSignal): boolean {
88+
return Boolean(externalSignal?.aborted && error.name === "AbortError");
89+
}
90+
91+
type RetryOptions = {
92+
timeout?: number;
93+
signal?: AbortSignal;
94+
};
95+
96+
async function fetchWithRetry(
97+
url: string,
98+
init: RequestInit,
99+
context: string,
100+
options?: RetryOptions
101+
): Promise<Response> {
102+
const timeout = options?.timeout ?? GHCR_REQUEST_TIMEOUT;
103+
const externalSignal = options?.signal;
104+
let lastError: Error | undefined;
105+
106+
for (let attempt = 0; attempt <= GHCR_MAX_RETRIES; attempt++) {
107+
try {
108+
const response = await fetch(url, {
109+
...init,
110+
signal: buildSignal(timeout, externalSignal),
111+
});
112+
return response;
113+
} catch (error) {
114+
lastError = error instanceof Error ? error : new Error(String(error));
115+
// Propagate external abort immediately — don't retry caller cancellation
116+
if (isExternalAbort(lastError, externalSignal)) {
117+
break;
118+
}
119+
// Only retry on timeout or network errors — not HTTP errors
120+
if (attempt >= GHCR_MAX_RETRIES || !isRetryableError(lastError)) {
121+
break;
122+
}
123+
}
124+
}
125+
126+
throw new UpgradeError(
127+
"network_error",
128+
`${context}: ${lastError?.message ?? "unknown error"}`
129+
);
130+
}
131+
23132
/** GHCR repository for CLI distribution */
24133
export const GHCR_REPO = "getsentry/cli";
25134

@@ -79,20 +188,14 @@ export type OciManifest = {
79188
* @returns Bearer token string
80189
* @throws {UpgradeError} On network failure or malformed response
81190
*/
82-
export async function getAnonymousToken(): Promise<string> {
191+
export async function getAnonymousToken(signal?: AbortSignal): Promise<string> {
83192
const url = `${GHCR_REGISTRY}/token?scope=repository:${GHCR_REPO}:pull`;
84-
let response: Response;
85-
try {
86-
response = await fetch(url, {
87-
headers: { "User-Agent": getUserAgent() },
88-
});
89-
} catch (error) {
90-
const msg = error instanceof Error ? error.message : String(error);
91-
throw new UpgradeError(
92-
"network_error",
93-
`Failed to connect to GHCR: ${msg}`
94-
);
95-
}
193+
const response = await fetchWithRetry(
194+
url,
195+
{ headers: { "User-Agent": getUserAgent() } },
196+
"Failed to connect to GHCR",
197+
{ signal }
198+
);
96199

97200
if (!response.ok) {
98201
throw new UpgradeError(
@@ -122,25 +225,22 @@ export async function getAnonymousToken(): Promise<string> {
122225
*/
123226
export async function fetchManifest(
124227
token: string,
125-
tag: string
228+
tag: string,
229+
signal?: AbortSignal
126230
): Promise<OciManifest> {
127231
const url = `${GHCR_REGISTRY}/v2/${GHCR_REPO}/manifests/${tag}`;
128-
let response: Response;
129-
try {
130-
response = await fetch(url, {
232+
const response = await fetchWithRetry(
233+
url,
234+
{
131235
headers: {
132236
Authorization: `Bearer ${token}`,
133237
Accept: OCI_MANIFEST_TYPE,
134238
"User-Agent": getUserAgent(),
135239
},
136-
});
137-
} catch (error) {
138-
const msg = error instanceof Error ? error.message : String(error);
139-
throw new UpgradeError(
140-
"network_error",
141-
`Failed to connect to GHCR: ${msg}`
142-
);
143-
}
240+
},
241+
`Failed to fetch manifest for tag "${tag}"`,
242+
{ signal }
243+
);
144244

145245
if (!response.ok) {
146246
throw new UpgradeError(
@@ -230,7 +330,8 @@ export function findLayerByFilename(
230330
*/
231331
export async function downloadNightlyBlob(
232332
token: string,
233-
digest: string
333+
digest: string,
334+
signal?: AbortSignal
234335
): Promise<Response> {
235336
const blobUrl = `${GHCR_REGISTRY}/v2/${GHCR_REPO}/blobs/${digest}`;
236337

@@ -244,6 +345,7 @@ export async function downloadNightlyBlob(
244345
"User-Agent": getUserAgent(),
245346
},
246347
redirect: "manual",
348+
signal: buildSignal(GHCR_BLOB_TIMEOUT, signal),
247349
});
248350
} catch (error) {
249351
const msg = error instanceof Error ? error.message : String(error);
@@ -275,10 +377,16 @@ export async function downloadNightlyBlob(
275377
// Step 2: Follow the redirect WITHOUT the Authorization header.
276378
// Azure rejects requests that include a Bearer token alongside its own
277379
// signed query-string credentials (returns 404).
380+
// No AbortSignal.timeout here: this fetch covers both connection AND
381+
// body streaming. For full nightly binaries (~30 MB), a 30s timeout
382+
// would require sustained ~8 Mbps throughput and fail on slow connections.
383+
// The GHCR step 1 timeout above guards against GHCR-side latency;
384+
// Azure Blob Storage has reliable latency characteristics.
278385
let redirectResponse: Response;
279386
try {
280387
redirectResponse = await fetch(redirectUrl, {
281388
headers: { "User-Agent": getUserAgent() },
389+
signal,
282390
});
283391
} catch (error) {
284392
const msg = error instanceof Error ? error.message : String(error);
@@ -317,25 +425,25 @@ const TAGS_PAGE_SIZE = 100;
317425
*/
318426
async function fetchTagPage(
319427
token: string,
320-
lastTag?: string
428+
lastTag?: string,
429+
signal?: AbortSignal
321430
): Promise<string[]> {
322431
let url = `${GHCR_REGISTRY}/v2/${GHCR_REPO}/tags/list?n=${TAGS_PAGE_SIZE}`;
323432
if (lastTag) {
324433
url += `&last=${encodeURIComponent(lastTag)}`;
325434
}
326435

327-
let response: Response;
328-
try {
329-
response = await fetch(url, {
436+
const response = await fetchWithRetry(
437+
url,
438+
{
330439
headers: {
331440
Authorization: `Bearer ${token}`,
332441
"User-Agent": getUserAgent(),
333442
},
334-
});
335-
} catch (error) {
336-
const msg = error instanceof Error ? error.message : String(error);
337-
throw new UpgradeError("network_error", `Failed to list GHCR tags: ${msg}`);
338-
}
443+
},
444+
"Failed to list GHCR tags",
445+
{ signal }
446+
);
339447

340448
if (!response.ok) {
341449
throw new UpgradeError(
@@ -361,13 +469,14 @@ async function fetchTagPage(
361469
*/
362470
export async function listTags(
363471
token: string,
364-
prefix?: string
472+
prefix?: string,
473+
signal?: AbortSignal
365474
): Promise<string[]> {
366475
const allTags: string[] = [];
367476
let lastTag: string | undefined;
368477

369478
for (;;) {
370-
const tags = await fetchTagPage(token, lastTag);
479+
const tags = await fetchTagPage(token, lastTag, signal);
371480
if (tags.length === 0) {
372481
break;
373482
}
@@ -402,8 +511,9 @@ export async function listTags(
402511
*/
403512
export async function downloadLayerBlob(
404513
token: string,
405-
digest: string
514+
digest: string,
515+
signal?: AbortSignal
406516
): Promise<ArrayBuffer> {
407-
const response = await downloadNightlyBlob(token, digest);
517+
const response = await downloadNightlyBlob(token, digest, signal);
408518
return response.arrayBuffer();
409519
}

0 commit comments

Comments
 (0)