Skip to content

Commit c05a4f8

Browse files
committed
fix: HAVING/ORDER BY stripped columns not in SELECT, compiler missed HAVING aggregates, append dropped booleans
Three bugs fixed: 1. SqlWrappingExecutor applied projection filter (step 3b) before HAVING and ORDER BY, stripping aggregate/sort columns not in SELECT. Example: SELECT dept FROM t GROUP BY dept HAVING SUM(salary) > 1000 would strip sum_salary before HAVING could evaluate it. Fix: defer projections to after HAVING + ORDER BY (step 5b), also strip projections from inner query when HAVING or multi-sort present, and propagate correct columns to final result. 2. Compiler only extracted aggregates from SELECT items. HAVING referencing aggregates not in SELECT (e.g. HAVING COUNT(*) > 5 with SELECT dept) produced no aggregates — executor never computed them. Fix: extractAggregatesFromExpr() walks HAVING expression tree and adds missing aggregates (deduped) before rewriting to column refs. 3. local-executor append() silently dropped boolean columns — the if/else chain handled number/bigint/string but not boolean. Fix: added boolean branch converting to int64 (0/1).
1 parent 29475b0 commit c05a4f8

File tree

4 files changed

+105
-14
lines changed

4 files changed

+105
-14
lines changed

src/local-executor.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ export class LocalExecutor implements QueryExecutor {
130130
const arr = new BigInt64Array(rows.length);
131131
for (let i = 0; i < rows.length; i++) arr[i] = rows[i][colName] as bigint;
132132
columnArrays.push({ name: colName, dtype: "int64", values: arr.buffer });
133+
} else if (typeof sample === "boolean") {
134+
const arr = new BigInt64Array(rows.length);
135+
for (let i = 0; i < rows.length; i++) { const v = rows[i][colName]; arr[i] = v === null || v === undefined ? 0n : v ? 1n : 0n; }
136+
columnArrays.push({ name: colName, dtype: "int64", values: arr.buffer });
133137
} else if (typeof sample === "string") {
134138
const enc = textEncoder;
135139
const parts: Uint8Array[] = [];

src/sql/compiler.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,10 @@ export function compileFull(stmt: SelectStmt): SqlCompileResult {
152152
}
153153

154154
// HAVING — rewrite aggregate calls to column refs for post-aggregation evaluation
155+
// Also extract any aggregates referenced in HAVING that aren't already in SELECT
155156
let havingExpr: SqlExpr | undefined;
156157
if (stmt.groupBy?.having) {
158+
extractAggregatesFromExpr(stmt.groupBy.having, aggregates);
157159
havingExpr = rewriteAggregatesAsColumns(stmt.groupBy.having);
158160
}
159161

@@ -235,6 +237,48 @@ function isAggregateCall(name: string): boolean {
235237
return AGGREGATE_FNS.has(name.toUpperCase());
236238
}
237239

240+
/** Walk an expression and add any aggregate calls to the list (deduped by fn+column). */
241+
function extractAggregatesFromExpr(expr: SqlExpr, aggregates: AggregateOp[]): void {
242+
switch (expr.kind) {
243+
case "call":
244+
if (isAggregateCall(expr.name)) {
245+
const agg = compileAggregate(expr);
246+
// Dedupe: don't add if an equivalent aggregate already exists
247+
const exists = aggregates.some(a => a.fn === agg.fn && a.column === agg.column);
248+
if (!exists) aggregates.push(agg);
249+
}
250+
for (const arg of expr.args) extractAggregatesFromExpr(arg, aggregates);
251+
return;
252+
case "binary":
253+
extractAggregatesFromExpr(expr.left, aggregates);
254+
extractAggregatesFromExpr(expr.right, aggregates);
255+
return;
256+
case "unary":
257+
extractAggregatesFromExpr(expr.operand, aggregates);
258+
return;
259+
case "between":
260+
extractAggregatesFromExpr(expr.expr, aggregates);
261+
extractAggregatesFromExpr(expr.low, aggregates);
262+
extractAggregatesFromExpr(expr.high, aggregates);
263+
return;
264+
case "in_list":
265+
extractAggregatesFromExpr(expr.expr, aggregates);
266+
for (const v of expr.values) extractAggregatesFromExpr(v, aggregates);
267+
return;
268+
case "case_expr":
269+
if (expr.operand) extractAggregatesFromExpr(expr.operand, aggregates);
270+
for (const w of expr.whenClauses) {
271+
extractAggregatesFromExpr(w.condition, aggregates);
272+
extractAggregatesFromExpr(w.result, aggregates);
273+
}
274+
if (expr.elseResult) extractAggregatesFromExpr(expr.elseResult, aggregates);
275+
return;
276+
case "cast":
277+
extractAggregatesFromExpr(expr.expr, aggregates);
278+
return;
279+
}
280+
}
281+
238282
function compileAggregate(expr: SqlExpr & { kind: "call" }, alias?: string): AggregateOp {
239283
let fn = expr.name.toLowerCase() as AggregateOp["fn"];
240284
let column = "*";

src/sql/executor.ts

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,13 @@ export class SqlWrappingExecutor implements QueryExecutor {
5555
delete innerQuery.groupBy;
5656
}
5757

58-
// If we have computed expressions, we need all columns for evaluation
59-
// — strip projections from inner query and apply them after computing
58+
// Strip projections when we need columns not in SELECT:
59+
// - computedExprs reference arbitrary columns for CASE/CAST/arithmetic
60+
// - havingExpr references aggregate columns that may not be in SELECT
61+
// - multi-column ORDER BY may sort on columns not in SELECT
62+
// Projections are re-applied after HAVING and ORDER BY (step 5b).
6063
let savedProjections: string[] | undefined;
61-
if (this.opts.computedExprs && this.opts.computedExprs.length > 0) {
64+
if ((this.opts.computedExprs && this.opts.computedExprs.length > 0) || this.opts.havingExpr || hasMultiSort) {
6265
savedProjections = innerQuery.projections;
6366
innerQuery.projections = [];
6467
}
@@ -105,16 +108,6 @@ export class SqlWrappingExecutor implements QueryExecutor {
105108
});
106109
}
107110

108-
// 3b. Apply deferred projections (after computed expressions added their columns)
109-
if (savedProjections && savedProjections.length > 0) {
110-
const keep = new Set([...savedProjections, ...((this.opts.computedExprs ?? []).map(e => e.alias))]);
111-
rows = rows.map(row => {
112-
const out: Row = {};
113-
for (const k of keep) if (k in row) out[k] = row[k];
114-
return out;
115-
});
116-
}
117-
118111
// 4. HAVING filter (post-aggregation)
119112
if (this.opts.havingExpr) {
120113
const havingExpr = this.opts.havingExpr;
@@ -143,13 +136,26 @@ export class SqlWrappingExecutor implements QueryExecutor {
143136
});
144137
}
145138

139+
// 5b. Apply deferred projections (after HAVING/ORDER BY which may reference non-SELECT columns)
140+
if (savedProjections && savedProjections.length > 0) {
141+
const keep = new Set([...savedProjections, ...((this.opts.computedExprs ?? []).map(e => e.alias))]);
142+
rows = rows.map(row => {
143+
const out: Row = {};
144+
for (const k of keep) if (k in row) out[k] = row[k];
145+
return out;
146+
});
147+
}
148+
146149
// 6. Re-apply offset + limit (stripped when post-filtering or multi-sorting)
147150
if (needsExternalLimit) {
148151
if (externalOffset) rows = rows.slice(externalOffset);
149152
if (externalLimit !== undefined) rows = rows.slice(0, externalLimit);
150153
}
151154

152-
return { ...result, rows, rowCount: rows.length };
155+
const finalColumns = savedProjections && savedProjections.length > 0
156+
? [...new Set([...savedProjections, ...((this.opts.computedExprs ?? []).map(e => e.alias))])]
157+
: result.columns;
158+
return { ...result, rows, rowCount: rows.length, columns: finalColumns };
153159
}
154160

155161
async explain(query: QueryDescriptor): Promise<import("../types.js").ExplainResult> {

src/sql/integration.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,43 @@ describe("SQL Integration - buildSqlDataFrame", () => {
207207
});
208208
});
209209

210+
describe("SQL Integration - HAVING with projections", () => {
211+
it("HAVING references aggregate column not in SELECT", async () => {
212+
const df = buildSqlDataFrame(
213+
"SELECT dept FROM data GROUP BY dept HAVING SUM(salary) > 300000",
214+
makeExecutor(),
215+
);
216+
const result = await df.collect();
217+
// eng: 120k+95k+130k = 345k > 300k ✓, sales: 110k+105k = 215k < 300k ✗
218+
expect(result.rowCount).toBe(1);
219+
expect(result.rows[0].dept).toBe("eng");
220+
// sum_salary should NOT be in final output since it's not in SELECT
221+
expect(result.columns).toEqual(["dept"]);
222+
});
223+
224+
it("HAVING with COUNT not in SELECT", async () => {
225+
const df = buildSqlDataFrame(
226+
"SELECT dept FROM data GROUP BY dept HAVING COUNT(*) >= 3",
227+
makeExecutor(),
228+
);
229+
const result = await df.collect();
230+
// eng: 3 rows ✓, sales: 2 rows ✗
231+
expect(result.rowCount).toBe(1);
232+
expect(result.rows[0].dept).toBe("eng");
233+
});
234+
235+
it("ORDER BY column not in SELECT", async () => {
236+
const df = buildSqlDataFrame(
237+
"SELECT name FROM data ORDER BY age ASC, salary DESC",
238+
makeExecutor(),
239+
);
240+
const result = await df.collect();
241+
// Bob(25), Diana(28), Alice(30), Eve(32), Charlie(35)
242+
expect(result.rows.map(r => r.name)).toEqual(["Bob", "Diana", "Alice", "Eve", "Charlie"]);
243+
expect(result.columns).toEqual(["name"]);
244+
});
245+
});
246+
210247
describe("SQL Integration - CTEs", () => {
211248
it("CTE inlines filters into main query", async () => {
212249
const df = buildSqlDataFrame(

0 commit comments

Comments
 (0)