Skip to content

Commit 68c28bb

Browse files
committed
fix: MaterializedExecutor distinct() with no args collapsed all rows to 1
groupKey(row, []) returned "" for every row since the column list was empty, making every row a "duplicate" of the first. Now falls back to Object.keys(row) when distinct columns are empty — matching DistinctOperator behavior. Also adds tests for MaterializedExecutor window functions and distinct.
1 parent aa846e5 commit 68c28bb

File tree

2 files changed

+69
-2
lines changed

2 files changed

+69
-2
lines changed

src/client.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -990,11 +990,12 @@ export class MaterializedExecutor implements QueryExecutor {
990990
rows = rows.filter(row => rowPassesFilters(row, query.filters, query.filterGroups));
991991
}
992992

993-
// Apply distinct
993+
// Apply distinct — empty array means "all columns" (same as DistinctOperator)
994994
if (query.distinct) {
995995
const seen = new Set<string>();
996996
rows = rows.filter(row => {
997-
const key = groupKey(row, query.distinct!);
997+
const cols = query.distinct!.length > 0 ? query.distinct! : Object.keys(row);
998+
const key = groupKey(row, cols);
998999
if (seen.has(key)) return false;
9991000
seen.add(key);
10001001
return true;

src/materialized-executor.test.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,72 @@ describe("MaterializedExecutor", () => {
440440
});
441441
});
442442

443+
describe("distinct", () => {
444+
it("removes duplicate rows by specified columns", async () => {
445+
const result = await createFromJSON(data).distinct("region").collect();
446+
const regions = result.rows.map(r => r.region);
447+
expect(new Set(regions).size).toBe(regions.length);
448+
expect(result.rowCount).toBe(3); // us, eu, asia
449+
});
450+
451+
it("distinct with no args deduplicates on all columns", async () => {
452+
const duped = [
453+
{ id: 1, name: "Alice" },
454+
{ id: 2, name: "Bob" },
455+
{ id: 1, name: "Alice" }, // exact duplicate
456+
{ id: 3, name: "Charlie" },
457+
];
458+
const result = await createFromJSON(duped).distinct().collect();
459+
expect(result.rowCount).toBe(3);
460+
});
461+
462+
it("distinct with no args preserves rows with different values", async () => {
463+
const result = await createFromJSON(data).distinct().collect();
464+
expect(result.rowCount).toBe(5); // all rows are unique
465+
});
466+
});
467+
468+
describe("window functions", () => {
469+
it("row_number partitioned by region", async () => {
470+
const result = await createFromJSON(data)
471+
.window({
472+
fn: "row_number",
473+
partitionBy: ["region"],
474+
orderBy: [{ column: "amount", direction: "asc" }],
475+
alias: "rn",
476+
})
477+
.collect();
478+
expect(result.rowCount).toBe(5);
479+
// Each partition should have row numbers starting at 1
480+
for (const row of result.rows) {
481+
expect(typeof row.rn).toBe("number");
482+
expect(row.rn).toBeGreaterThanOrEqual(1);
483+
}
484+
// US partition: Alice(100)=1, Charlie(150)=2
485+
const usRows = result.rows.filter(r => r.region === "us").sort((a, b) => (a.amount as number) - (b.amount as number));
486+
expect(usRows[0].rn).toBe(1);
487+
expect(usRows[1].rn).toBe(2);
488+
});
489+
490+
it("sum window over all rows", async () => {
491+
const result = await createFromJSON(data)
492+
.window({
493+
fn: "sum",
494+
column: "amount",
495+
partitionBy: [],
496+
orderBy: [],
497+
alias: "total",
498+
frame: { type: "rows", start: "unbounded", end: "unbounded" },
499+
})
500+
.collect();
501+
expect(result.rowCount).toBe(5);
502+
// Every row should have total = 800
503+
for (const row of result.rows) {
504+
expect(row.total).toBe(800);
505+
}
506+
});
507+
});
508+
443509
describe("schema evolution", () => {
444510
it("addColumn adds column with default value to all rows", async () => {
445511
const df = createFromJSON(data);

0 commit comments

Comments
 (0)