Skip to content

Commit 3af0679

Browse files
committed
perf: eliminate double TextEncoder.encode() in encodeColumnarRun
UTF8 strings were encoded twice — once during size calculation and again during the write pass. Pre-encode once and reuse via Map<colIndex, Uint8Array[]>.
1 parent 94213f6 commit 3af0679

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

src/r2-spill.ts

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ export function encodeColumnarRun(rows: Row[]): ArrayBuffer {
150150
totalSize += 2 + encodedNames[ci].length + 1 + 1; // nameLen + name + dtype + hasNulls
151151
}
152152

153+
// Pre-encode UTF8 strings once (reused in write pass to avoid double-encoding)
154+
const preEncodedUtf8 = new Map<number, Uint8Array[]>();
155+
153156
// Column data
154157
for (let ci = 0; ci < columnCount; ci++) {
155158
if (hasNulls[ci]) totalSize += bitmapBytes;
@@ -163,11 +166,15 @@ export function encodeColumnarRun(rows: Row[]): ArrayBuffer {
163166
totalSize += 4; // totalLen
164167
totalSize += (rowCount + 1) * 4; // offsets
165168
const name = colNames[ci];
169+
const encoded: Uint8Array[] = [];
166170
let strBytes = 0;
167171
for (let ri = 0; ri < rowCount; ri++) {
168172
const val = rows[ri][name];
169-
if (typeof val === "string") strBytes += encoder.encode(val).length;
173+
const enc = typeof val === "string" ? encoder.encode(val) : new Uint8Array(0);
174+
encoded.push(enc);
175+
strBytes += enc.length;
170176
}
177+
preEncodedUtf8.set(ci, encoded);
171178
totalSize += strBytes;
172179
break;
173180
}
@@ -243,15 +250,10 @@ export function encodeColumnarRun(rows: Row[]): ArrayBuffer {
243250
break;
244251
}
245252
case DTYPE_UTF8: {
246-
// Encode all strings, build offsets
247-
const encodedStrs: Uint8Array[] = [];
253+
// Reuse pre-encoded strings from size calculation pass
254+
const encodedStrs = preEncodedUtf8.get(ci)!;
248255
let totalLen = 0;
249-
for (let ri = 0; ri < rowCount; ri++) {
250-
const val = rows[ri][name];
251-
const encoded = typeof val === "string" ? encoder.encode(val) : new Uint8Array(0);
252-
encodedStrs.push(encoded);
253-
totalLen += encoded.length;
254-
}
256+
for (let ri = 0; ri < rowCount; ri++) totalLen += encodedStrs[ri].length;
255257

256258
view.setUint32(offset, totalLen, true); offset += 4;
257259

0 commit comments

Comments
 (0)