Skip to content

Commit 3df99e6

Browse files
committed
refactor: dedup encodeColumnBuffer across CSV and JSON readers (-98 lines)
Identical 81-line encodeColumnBuffer() was duplicated in csv-reader.ts and json-reader.ts. Extracted to reader.ts as a shared export.
1 parent 696c270 commit 3df99e6

File tree

3 files changed

+108
-206
lines changed

3 files changed

+108
-206
lines changed

src/reader.ts

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* without hardcoding branches in LocalExecutor.
77
*/
88

9-
import type { ColumnMeta } from "./types.js";
9+
import type { ColumnMeta, DataType } from "./types.js";
1010
import type { FragmentSource } from "./operators.js";
1111

1212
// ---------------------------------------------------------------------------
@@ -156,3 +156,108 @@ export class UrlDataSource implements DataSource {
156156
return resp.arrayBuffer();
157157
}
158158
}
159+
160+
// ---------------------------------------------------------------------------
161+
// encodeColumnBuffer — shared by CSV and JSON readers
162+
// ---------------------------------------------------------------------------
163+
164+
/**
165+
* Encode typed column values into the binary wire format used by decodePage().
166+
* Used by ingestion readers to produce page data compatible with the scan pipeline.
167+
*/
168+
export function encodeColumnBuffer(
169+
values: (number | bigint | string | boolean | null)[],
170+
dtype: DataType,
171+
): ArrayBuffer {
172+
switch (dtype) {
173+
case "bool": {
174+
const numBytes = Math.ceil(values.length / 8);
175+
const buf = new Uint8Array(numBytes);
176+
for (let i = 0; i < values.length; i++) {
177+
if (values[i] === true || values[i] === 1) {
178+
buf[i >> 3] |= 1 << (i & 7);
179+
}
180+
}
181+
return buf.buffer;
182+
}
183+
case "int8": {
184+
const arr = new Int8Array(values.length);
185+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
186+
return arr.buffer;
187+
}
188+
case "uint8": {
189+
const arr = new Uint8Array(values.length);
190+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
191+
return arr.buffer;
192+
}
193+
case "int16": {
194+
const arr = new Int16Array(values.length);
195+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
196+
return arr.buffer;
197+
}
198+
case "uint16": {
199+
const arr = new Uint16Array(values.length);
200+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
201+
return arr.buffer;
202+
}
203+
case "int32": {
204+
const arr = new Int32Array(values.length);
205+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
206+
return arr.buffer;
207+
}
208+
case "uint32": {
209+
const arr = new Uint32Array(values.length);
210+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
211+
return arr.buffer;
212+
}
213+
case "int64": {
214+
const arr = new BigInt64Array(values.length);
215+
for (let i = 0; i < values.length; i++) {
216+
const v = values[i];
217+
arr[i] = typeof v === "bigint" ? v : BigInt(Math.trunc(Number(v ?? 0)));
218+
}
219+
return arr.buffer;
220+
}
221+
case "uint64": {
222+
const arr = new BigUint64Array(values.length);
223+
for (let i = 0; i < values.length; i++) {
224+
const v = values[i];
225+
arr[i] = typeof v === "bigint" ? BigInt.asUintN(64, v) : BigInt(Math.trunc(Number(v ?? 0)));
226+
}
227+
return arr.buffer;
228+
}
229+
case "float32": {
230+
const arr = new Float32Array(values.length);
231+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
232+
return arr.buffer;
233+
}
234+
case "float64": {
235+
const arr = new Float64Array(values.length);
236+
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
237+
return arr.buffer;
238+
}
239+
case "utf8":
240+
case "binary": {
241+
const encoder = new TextEncoder();
242+
const parts: Uint8Array[] = [];
243+
let totalLen = 0;
244+
for (const v of values) {
245+
const str = v === null ? "" : String(v);
246+
const encoded = encoder.encode(str);
247+
const header = new Uint8Array(4);
248+
new DataView(header.buffer).setUint32(0, encoded.length, true);
249+
parts.push(header, encoded);
250+
totalLen += 4 + encoded.length;
251+
}
252+
const buf = new Uint8Array(totalLen);
253+
let off = 0;
254+
for (const p of parts) {
255+
buf.set(p, off);
256+
off += p.length;
257+
}
258+
return buf.buffer;
259+
}
260+
default:
261+
return new ArrayBuffer(0);
262+
}
263+
}

src/readers/csv-reader.ts

Lines changed: 1 addition & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* - FragmentSource that yields decoded rows
1010
*/
1111

12-
import type { FormatReader, DataSource } from "../reader.js";
12+
import { type FormatReader, type DataSource, encodeColumnBuffer } from "../reader.js";
1313
import type { ColumnMeta, DataType, PageInfo, Row } from "../types.js";
1414
import type { FragmentSource } from "../operators.js";
1515

@@ -302,108 +302,6 @@ class CsvFragmentSource implements FragmentSource {
302302
}
303303
}
304304

305-
// ---------------------------------------------------------------------------
306-
// Encode column values into the binary wire format used by decodePage()
307-
// ---------------------------------------------------------------------------
308-
309-
function encodeColumnBuffer(
310-
values: (number | bigint | string | boolean | null)[],
311-
dtype: DataType,
312-
): ArrayBuffer {
313-
switch (dtype) {
314-
case "bool": {
315-
const numBytes = Math.ceil(values.length / 8);
316-
const buf = new Uint8Array(numBytes);
317-
for (let i = 0; i < values.length; i++) {
318-
if (values[i] === true || values[i] === 1) {
319-
buf[i >> 3] |= 1 << (i & 7);
320-
}
321-
}
322-
return buf.buffer;
323-
}
324-
case "int8": {
325-
const arr = new Int8Array(values.length);
326-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
327-
return arr.buffer;
328-
}
329-
case "uint8": {
330-
const arr = new Uint8Array(values.length);
331-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
332-
return arr.buffer;
333-
}
334-
case "int16": {
335-
const arr = new Int16Array(values.length);
336-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
337-
return arr.buffer;
338-
}
339-
case "uint16": {
340-
const arr = new Uint16Array(values.length);
341-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
342-
return arr.buffer;
343-
}
344-
case "int32": {
345-
const arr = new Int32Array(values.length);
346-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
347-
return arr.buffer;
348-
}
349-
case "uint32": {
350-
const arr = new Uint32Array(values.length);
351-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
352-
return arr.buffer;
353-
}
354-
case "int64": {
355-
const arr = new BigInt64Array(values.length);
356-
for (let i = 0; i < values.length; i++) {
357-
const v = values[i];
358-
arr[i] = typeof v === "bigint" ? v : BigInt(Math.trunc(Number(v ?? 0)));
359-
}
360-
return arr.buffer;
361-
}
362-
case "uint64": {
363-
const arr = new BigUint64Array(values.length);
364-
for (let i = 0; i < values.length; i++) {
365-
const v = values[i];
366-
arr[i] = typeof v === "bigint" ? BigInt.asUintN(64, v) : BigInt(Math.trunc(Number(v ?? 0)));
367-
}
368-
return arr.buffer;
369-
}
370-
case "float32": {
371-
const arr = new Float32Array(values.length);
372-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
373-
return arr.buffer;
374-
}
375-
case "float64": {
376-
const arr = new Float64Array(values.length);
377-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
378-
return arr.buffer;
379-
}
380-
case "utf8":
381-
case "binary": {
382-
// Lance/QueryMode utf8 wire format: [u32 len][bytes]...
383-
const encoder = new TextEncoder();
384-
const parts: Uint8Array[] = [];
385-
let totalLen = 0;
386-
for (const v of values) {
387-
const str = v === null ? "" : String(v);
388-
const encoded = encoder.encode(str);
389-
const header = new Uint8Array(4);
390-
new DataView(header.buffer).setUint32(0, encoded.length, true);
391-
parts.push(header, encoded);
392-
totalLen += 4 + encoded.length;
393-
}
394-
const buf = new Uint8Array(totalLen);
395-
let off = 0;
396-
for (const p of parts) {
397-
buf.set(p, off);
398-
off += p.length;
399-
}
400-
return buf.buffer;
401-
}
402-
default:
403-
return new ArrayBuffer(0);
404-
}
405-
}
406-
407305
// ---------------------------------------------------------------------------
408306
// CsvReader — implements FormatReader
409307
// ---------------------------------------------------------------------------

src/readers/json-reader.ts

Lines changed: 1 addition & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Schema is inferred from the first N objects.
99
*/
1010

11-
import type { FormatReader, DataSource } from "../reader.js";
11+
import { type FormatReader, type DataSource, encodeColumnBuffer } from "../reader.js";
1212
import type { ColumnMeta, DataType, PageInfo, Row } from "../types.js";
1313
import type { FragmentSource } from "../operators.js";
1414

@@ -243,107 +243,6 @@ class JsonFragmentSource implements FragmentSource {
243243
}
244244
}
245245

246-
// ---------------------------------------------------------------------------
247-
// Encode typed values into the binary format expected by decodePage()
248-
// ---------------------------------------------------------------------------
249-
250-
function encodeColumnBuffer(
251-
values: (number | bigint | string | boolean | null)[],
252-
dtype: DataType,
253-
): ArrayBuffer {
254-
switch (dtype) {
255-
case "bool": {
256-
const numBytes = Math.ceil(values.length / 8);
257-
const buf = new Uint8Array(numBytes);
258-
for (let i = 0; i < values.length; i++) {
259-
if (values[i] === true || values[i] === 1) {
260-
buf[i >> 3] |= 1 << (i & 7);
261-
}
262-
}
263-
return buf.buffer;
264-
}
265-
case "int8": {
266-
const arr = new Int8Array(values.length);
267-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
268-
return arr.buffer;
269-
}
270-
case "uint8": {
271-
const arr = new Uint8Array(values.length);
272-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
273-
return arr.buffer;
274-
}
275-
case "int16": {
276-
const arr = new Int16Array(values.length);
277-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
278-
return arr.buffer;
279-
}
280-
case "uint16": {
281-
const arr = new Uint16Array(values.length);
282-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
283-
return arr.buffer;
284-
}
285-
case "int32": {
286-
const arr = new Int32Array(values.length);
287-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
288-
return arr.buffer;
289-
}
290-
case "uint32": {
291-
const arr = new Uint32Array(values.length);
292-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
293-
return arr.buffer;
294-
}
295-
case "int64": {
296-
const arr = new BigInt64Array(values.length);
297-
for (let i = 0; i < values.length; i++) {
298-
const v = values[i];
299-
arr[i] = typeof v === "bigint" ? v : BigInt(Math.trunc(Number(v ?? 0)));
300-
}
301-
return arr.buffer;
302-
}
303-
case "uint64": {
304-
const arr = new BigUint64Array(values.length);
305-
for (let i = 0; i < values.length; i++) {
306-
const v = values[i];
307-
arr[i] = typeof v === "bigint" ? BigInt.asUintN(64, v) : BigInt(Math.trunc(Number(v ?? 0)));
308-
}
309-
return arr.buffer;
310-
}
311-
case "float32": {
312-
const arr = new Float32Array(values.length);
313-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
314-
return arr.buffer;
315-
}
316-
case "float64": {
317-
const arr = new Float64Array(values.length);
318-
for (let i = 0; i < values.length; i++) arr[i] = Number(values[i] ?? 0);
319-
return arr.buffer;
320-
}
321-
case "utf8":
322-
case "binary": {
323-
const encoder = new TextEncoder();
324-
const parts: Uint8Array[] = [];
325-
let totalLen = 0;
326-
for (const v of values) {
327-
const str = v === null ? "" : String(v);
328-
const encoded = encoder.encode(str);
329-
const header = new Uint8Array(4);
330-
new DataView(header.buffer).setUint32(0, encoded.length, true);
331-
parts.push(header, encoded);
332-
totalLen += 4 + encoded.length;
333-
}
334-
const buf = new Uint8Array(totalLen);
335-
let off = 0;
336-
for (const p of parts) {
337-
buf.set(p, off);
338-
off += p.length;
339-
}
340-
return buf.buffer;
341-
}
342-
default:
343-
return new ArrayBuffer(0);
344-
}
345-
}
346-
347246
// ---------------------------------------------------------------------------
348247
// JsonReader — implements FormatReader
349248
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)