Skip to content

Commit 48748c2

Browse files
committed
fix: toCode() join/set-op missing tableFn prefix; add docs and 4 new tests
- Fix extractChainBody to preserve the tableFn (e.g. "qm") prefix in nested join and set operation code generation (was producing invalid bare ".table()") - Add toCode/toDescriptor to terminal methods table in dataframe-api docs - Add dedicated "Query decompilation" section with examples to dataframe-api docs - Update README to mention .toCode() decompiler - 4 new tests: join tableFn prefix, set-op tableFn prefix, special char escaping, multiple windows
1 parent ee49a08 commit 48748c2

File tree

4 files changed

+91
-5
lines changed

4 files changed

+91
-5
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ npx tsx examples/nextjs-api-route.ts
243243

244244
- **TypeScript orchestration** — Durable Object lifecycle, R2 range reads, footer caching, request routing
245245
- **Zig WASM engine** (`wasm/`) — column decoding, SIMD ops, SQL execution, vector search, fragment writing, compiles to `querymode.wasm`
246-
- **Code-first query API**`.table().filter().select().sort().limit().exec()` or `.sql("SELECT ...")`
246+
- **Code-first query API**`.table().filter().select().sort().limit().exec()` or `.sql("SELECT ...")`, with `.toCode()` decompiler for logging and LLM context compression
247247
- **Write path**`append(rows, { path, metadata })` with CAS-based manifest coordination via Master DO, `dropTable()` for cleanup
248248
- **Master/Query DO split** — single-writer Master broadcasts footer invalidations to per-region Query DOs
249249
- **Footer caching** — table footers (~4KB each) cached in DO memory with VIP eviction (hot tables protected from eviction)

docs/src/content/docs/dataframe-api.mdx

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ These execute the query and return results:
172172
| `stream(batchSize)` | `AsyncGenerator<Row[]>` | Async generator over result batches |
173173
| `cursor({ batchSize })` | `AsyncIterable<Row[]>` | Async iterable over batches |
174174
| `materialize()` | `DataFrame` | Execute and re-wrap as in-memory DataFrame |
175+
| `toCode(opts?)` | `string` | Convert query to fluent builder TypeScript code |
176+
| `toDescriptor()` | `QueryDescriptor` | Serializable query plan (JSON) |
175177
| `append(rows, opts?)` | `AppendResult` | Write rows (CAS-coordinated) |
176178
| `dropTable()` | `DropResult` | Delete table and all fragments |
177179

@@ -202,6 +204,45 @@ const csv = await df.toCSV({ delimiter: "\t" })
202204

203205
These are all sugar over existing methods — `shape()` calls `explain()`, `valueCounts()` uses `groupBy().aggregate()`, `fillNull()` and `cast()` use `computed()`.
204206

207+
## Query decompilation (toCode)
208+
209+
Convert any query back to readable, copy-pasteable fluent builder TypeScript. Useful for logging, debugging, and LLM context compression (~50x smaller than raw QueryDescriptor JSON).
210+
211+
```typescript
212+
const df = qm
213+
.table("orders")
214+
.filter("category", "eq", "Electronics")
215+
.filter("amount", "gte", 100)
216+
.select("id", "amount", "region")
217+
.sort("amount", "desc")
218+
.limit(20)
219+
220+
console.log(df.toCode())
221+
// const result = await qm
222+
// .table("orders")
223+
// .filter("category", "eq", "Electronics")
224+
// .filter("amount", "gte", 100)
225+
// .select("id", "amount", "region")
226+
// .sort("amount", "desc")
227+
// .limit(20)
228+
// .collect()
229+
```
230+
231+
Customize the output variable name and table function:
232+
233+
```typescript
234+
df.toCode({ variableName: "data", tableFn: "db" })
235+
// const data = await db
236+
// .table("orders")
237+
// ...
238+
239+
// Or use the static method on a raw descriptor:
240+
import { descriptorToCode } from "querymode"
241+
descriptorToCode(someDescriptor)
242+
```
243+
244+
Handles all features: filters (all 14 ops with shorthand methods), aggregates, groupBy, joins, window functions, set operations, vector search, distinct, sort, limit/offset, version, cache, computed columns, pipe stages.
245+
205246
## Progress tracking
206247

207248
For long-running queries, track progress with a callback:

src/descriptor-to-code.test.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,19 @@ describe("descriptorToCode", () => {
225225
expect(code).toContain('"left"');
226226
});
227227

228+
it("join — right side has tableFn prefix (not bare .table())", () => {
229+
const code = descriptorToCode(makeDesc({
230+
join: {
231+
right: makeDesc({ table: "users" }),
232+
leftKey: "user_id",
233+
rightKey: "id",
234+
type: "left",
235+
},
236+
}));
237+
// The join argument must start with "qm" (or custom tableFn), not bare ".table()"
238+
expect(code).toMatch(/\.join\(\s*\n\s*qm\s*\n/);
239+
});
240+
228241
it("join — inner (default) omits type", () => {
229242
const code = descriptorToCode(makeDesc({
230243
join: {
@@ -246,6 +259,13 @@ describe("descriptorToCode", () => {
246259
expect(code).toContain('.table("archive_orders")');
247260
});
248261

262+
it("set operation — right side has tableFn prefix", () => {
263+
const code = descriptorToCode(makeDesc({
264+
setOperation: { mode: "union", right: makeDesc({ table: "archive" }) },
265+
}), { tableFn: "db" });
266+
expect(code).toMatch(/\.union\(\s*\n\s*db\s*\n/);
267+
});
268+
249269
it("set operation — union all", () => {
250270
const code = descriptorToCode(makeDesc({
251271
setOperation: { mode: "union_all", right: makeDesc({ table: "archive" }) },
@@ -386,6 +406,26 @@ describe("descriptorToCode", () => {
386406
expect(code).toContain(".limit(0)");
387407
});
388408

409+
it("string values with special characters are properly escaped", () => {
410+
const code = descriptorToCode(makeDesc({
411+
filters: [{ column: "name", op: "eq", value: 'O\'Brien "Bob"' }],
412+
}));
413+
expect(code).toContain('.filter("name", "eq", "O\'Brien \\"Bob\\"")');
414+
});
415+
416+
it("multiple windows produce multiple .window() calls", () => {
417+
const code = descriptorToCode(makeDesc({
418+
windows: [
419+
{ fn: "row_number", partitionBy: ["a"], orderBy: [{ column: "b", direction: "asc" }], alias: "rn" },
420+
{ fn: "sum", column: "val", partitionBy: ["a"], orderBy: [], alias: "running_sum" },
421+
],
422+
}));
423+
const windowMatches = code.match(/\.window\(/g);
424+
expect(windowMatches).toHaveLength(2);
425+
expect(code).toContain('alias: "rn"');
426+
expect(code).toContain('alias: "running_sum"');
427+
});
428+
389429
it("offset 0 is NOT emitted (falsy)", () => {
390430
const code = descriptorToCode(makeDesc({ offset: 0 }));
391431
expect(code).not.toContain(".offset(");

src/descriptor-to-code.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,15 @@ export function descriptorToCode(
176176

177177
/** Extract the fluent chain body from generated code (strips variable declaration and terminal). */
178178
function extractChainBody(code: string): string {
179-
return code.split("\n")
180-
.map(l => l.trim())
181-
.filter(l => l && !l.startsWith("const ") && l !== ".collect()")
182-
.join("\n ");
179+
// Strip "const <var> = await " prefix and trailing ".collect()"
180+
const awaitIdx = code.indexOf("await ");
181+
const body = awaitIdx >= 0 ? code.slice(awaitIdx + 6) : code;
182+
const lines = body.split("\n").map(l => l.trim()).filter(Boolean);
183+
// Remove trailing .collect()
184+
if (lines.length > 0 && lines[lines.length - 1] === ".collect()") {
185+
lines.pop();
186+
}
187+
return lines.join("\n ");
183188
}
184189

185190
function str(v: unknown): string {

0 commit comments

Comments
 (0)