Skip to content

Commit c3745e6

Browse files
committed
perf: coalesceRanges auto-computes gap — eliminates redundant sort
All 6 call sites used coalesceRanges(ranges, autoCoalesceGap(ranges)) which sorted the ranges array twice — once in autoCoalesceGap and again in coalesceRanges. Now coalesceRanges accepts optional maxGap and auto-computes from the already-sorted array when omitted.
1 parent 96cc76d commit c3745e6

File tree

4 files changed

+23
-15
lines changed

4 files changed

+23
-15
lines changed

src/coalesce.ts

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,13 @@ export interface CoalescedRange {
1313
}
1414

1515
/**
16-
* Compute optimal coalesce gap based on page density.
16+
* Compute optimal coalesce gap from a **pre-sorted** range array.
1717
* Dense layouts (small gaps between pages) benefit from aggressive merging.
1818
* Sparse layouts waste bandwidth with large gaps.
1919
* Returns a gap between 16KB (sparse) and 256KB (dense).
2020
*/
21-
export function autoCoalesceGap(ranges: Range[]): number {
22-
if (ranges.length < 2) return 64 * 1024;
23-
const sorted = [...ranges].sort((a, b) => a.offset - b.offset);
21+
function computeGap(sorted: Range[]): number {
22+
if (sorted.length < 2) return 64 * 1024;
2423
// Compute median gap between adjacent ranges
2524
const gaps: number[] = [];
2625
for (let i = 1; i < sorted.length; i++) {
@@ -34,17 +33,26 @@ export function autoCoalesceGap(ranges: Range[]): number {
3433
return Math.max(16 * 1024, Math.min(256 * 1024, medianGap * 2));
3534
}
3635

37-
/** Merge nearby byte ranges into fewer R2 reads. Sorts by offset, merges if gap <= maxGap. */
38-
export function coalesceRanges(ranges: Range[], maxGap: number): CoalescedRange[] {
36+
/** Compute optimal coalesce gap based on page density. Sorts a copy internally. */
37+
export function autoCoalesceGap(ranges: Range[]): number {
38+
if (ranges.length < 2) return 64 * 1024;
39+
const sorted = [...ranges].sort((a, b) => a.offset - b.offset);
40+
return computeGap(sorted);
41+
}
42+
43+
/** Merge nearby byte ranges into fewer R2 reads. Sorts by offset, merges if gap <= maxGap.
44+
* If maxGap is omitted, auto-computes from page density (sorts once instead of twice). */
45+
export function coalesceRanges(ranges: Range[], maxGap?: number): CoalescedRange[] {
3946
if (ranges.length === 0) return [];
4047
const sorted = [...ranges].sort((a, b) => a.offset - b.offset);
48+
const gap = maxGap ?? computeGap(sorted);
4149
const result: CoalescedRange[] = [];
4250
let cur: CoalescedRange = { offset: sorted[0].offset, length: sorted[0].length, ranges: [sorted[0]] };
4351

4452
for (let i = 1; i < sorted.length; i++) {
4553
const r = sorted[i];
4654
const curEnd = cur.offset + cur.length;
47-
if (r.offset <= curEnd + maxGap) {
55+
if (r.offset <= curEnd + gap) {
4856
cur.length = Math.max(curEnd, r.offset + r.length) - cur.offset;
4957
cur.ranges.push(r);
5058
} else {

src/fragment-do.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import type { Env, TableMeta, QueryResult, Row } from "./types.js";
33
import { queryReferencedColumns } from "./types.js";
44
import type { QueryDescriptor } from "./client.js";
55
import { instantiateWasm, type WasmEngine } from "./wasm-engine.js";
6-
import { coalesceRanges, autoCoalesceGap, fetchBounded, withRetry, withTimeout } from "./coalesce.js";
6+
import { coalesceRanges, fetchBounded, withRetry, withTimeout } from "./coalesce.js";
77
import { R2SpillBackend } from "./r2-spill.js";
88
import {
99
type Operator, type RowBatch,
@@ -121,7 +121,7 @@ export class FragmentDO extends DurableObject<Env> {
121121
const r2Start = Date.now();
122122
if (uncachedRanges.length > 0) {
123123
const fetchRanges = uncachedRanges.map(u => u.range);
124-
const coalesced = coalesceRanges(fetchRanges, autoCoalesceGap(fetchRanges));
124+
const coalesced = coalesceRanges(fetchRanges);
125125
const fetched = await fetchBounded(
126126
coalesced.map(c => () =>
127127
withRetry(() =>

src/local-executor.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { parseFooter, parseColumnMetaFromProtobuf, FOOTER_SIZE } from "./footer.
1212
import { parseManifest } from "./manifest.js";
1313
import { detectFormat, getParquetFooterLength, parseParquetFooter, parquetMetaToTableMeta } from "./parquet.js";
1414
import { assembleRows, canSkipFragment } from "./decode.js";
15-
import { coalesceRanges, autoCoalesceGap } from "./coalesce.js";
15+
import { coalesceRanges } from "./coalesce.js";
1616
import { instantiateWasm, rowsToColumnArrays, type WasmEngine } from "./wasm-engine.js";
1717

1818
import { VipCache } from "./vip-cache.js";
@@ -208,7 +208,7 @@ export class LocalExecutor implements QueryExecutor {
208208
colDetails.push({ name: col.name, dtype: col.dtype as DataType, pages: colPages, bytes: colBytes });
209209
}
210210

211-
const coalesced = coalesceRanges(ranges, autoCoalesceGap(ranges));
211+
const coalesced = coalesceRanges(ranges);
212212
const estimatedBytes = ranges.reduce((s, r) => s + r.length, 0);
213213
const totalRows = columns[0]?.pages.reduce((s, p) => s + p.rowCount, 0) ?? 0;
214214

src/query-do.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import { decodeParquetColumnChunk } from "./parquet-decode.js";
1111
import { instantiateWasm, type WasmEngine } from "./wasm-engine.js";
1212
import { mergeQueryResults } from "./merge.js";
1313
import { decodeColumnarBatch, columnarBatchToRows } from "./columnar.js";
14-
import { coalesceRanges, autoCoalesceGap, fetchBounded, withRetry, withTimeout } from "./coalesce.js";
14+
import { coalesceRanges, fetchBounded, withRetry, withTimeout } from "./coalesce.js";
1515
import { R2SpillBackend, encodeColumnarRun } from "./r2-spill.js";
1616
import {
1717
type Operator, type RowBatch,
@@ -192,7 +192,7 @@ class EdgeScanOperator implements Operator {
192192
// L3: R2
193193
if (uncachedRanges.length > 0) {
194194
const r2Start = Date.now();
195-
const coalesced = coalesceRanges(uncachedRanges, autoCoalesceGap(uncachedRanges));
195+
const coalesced = coalesceRanges(uncachedRanges);
196196
const fetched = await fetchBounded(
197197
coalesced.map(c => () =>
198198
withRetry(() =>
@@ -750,7 +750,7 @@ export class QueryDO extends DurableObject<Env> {
750750
colDetails.push({ name: col.name, dtype: col.dtype as DataType, pages: colPages, bytes: colBytes });
751751
}
752752

753-
const coalesced = coalesceRanges(ranges, autoCoalesceGap(ranges));
753+
const coalesced = coalesceRanges(ranges);
754754
const estimatedBytes = ranges.reduce((s, r) => s + r.length, 0);
755755
const dataset = this.datasetCache.get(query.table);
756756
const totalFragments = dataset ? dataset.fragmentMetas.size : 1;
@@ -1060,7 +1060,7 @@ export class QueryDO extends DurableObject<Env> {
10601060
let bytesRead = 0;
10611061

10621062
if (uncachedRanges.length > 0) {
1063-
const coalesced = coalesceRanges(uncachedRanges, autoCoalesceGap(uncachedRanges));
1063+
const coalesced = coalesceRanges(uncachedRanges);
10641064

10651065
const fetched = await fetchBounded(
10661066
coalesced.map(c => () =>

0 commit comments

Comments
 (0)