From fb262d04897c0d44ff770543e090b4b90f36c80d Mon Sep 17 00:00:00 2001 From: noahisaksen Date: Sun, 4 Jan 2026 16:47:04 -0500 Subject: [PATCH 1/6] test to reproduce --- test/sql/storage/limit_parallel_bug.test | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 test/sql/storage/limit_parallel_bug.test diff --git a/test/sql/storage/limit_parallel_bug.test b/test/sql/storage/limit_parallel_bug.test new file mode 100644 index 000000000..06edb7d80 --- /dev/null +++ b/test/sql/storage/limit_parallel_bug.test @@ -0,0 +1,67 @@ +# name: test/sql/storage/limit_parallel_bug.test +# description: Test LIMIT bug with parallelism (issues #380, #395) +# group: [storage] + +require postgres_scanner + +require-env POSTGRES_TEST_DATABASE_AVAILABLE + +statement ok +ATTACH 'host=127.0.0.1 port=5432 dbname=postgresscanner user=postgres' AS s (TYPE POSTGRES) + +# Create a large table to enable parallelism +statement ok +CREATE OR REPLACE TABLE s.limit_test_large AS +SELECT unnest(generate_series(1, 1000000)) as id, + CASE floor(random() * 3)::int + WHEN 0 THEN 'Group A' + WHEN 1 THEN 'Group B' + ELSE 'Group C' + END as grp + +# Force parallelism with small pages_per_task +statement ok +SET pg_pages_per_task=1 + +# Test 1: Basic LIMIT 1 should return exactly 1 row +query I +SELECT count(*) FROM (SELECT * FROM s.limit_test_large LIMIT 1) +---- +1 + +# Test 2: LIMIT 1 with WHERE clause +query I +SELECT count(*) FROM (SELECT * FROM s.limit_test_large WHERE grp = 'Group A' LIMIT 1) +---- +1 + +# Test 3: Within transaction +statement ok +BEGIN + +query I +SELECT count(*) FROM (SELECT * FROM s.limit_test_large LIMIT 1) +---- +1 + +statement ok +ROLLBACK + +# Test 4: Transaction with index creation (issue #395 scenario) +statement ok +BEGIN + +statement ok +CREATE INDEX IF NOT EXISTS limit_test_idx ON s.limit_test_large (grp) + +query I +SELECT count(*) FROM (SELECT * FROM s.limit_test_large WHERE grp = 'Group A' LIMIT 1) +---- +1 + +statement ok +ROLLBACK + +# Cleanup +statement ok +DROP TABLE IF EXISTS s.limit_test_large From a35b0ebd2db1bcc55f151d809973dae86d73440d Mon Sep 17 00:00:00 2001 From: noahisaksen Date: Sun, 4 Jan 2026 17:03:08 -0500 Subject: [PATCH 2/6] fix --- src/storage/postgres_optimizer.cpp | 5 +++++ test/sql/storage/limit_parallel_bug.test | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/storage/postgres_optimizer.cpp b/src/storage/postgres_optimizer.cpp index 57cac8139..9935d8671 100644 --- a/src/storage/postgres_optimizer.cpp +++ b/src/storage/postgres_optimizer.cpp @@ -69,6 +69,11 @@ static void OptimizePostgresScanLimitPushdown(unique_ptr &op) { if (!generated_limit_clause.empty()) { bind_data.limit = generated_limit_clause; + // When LIMIT is pushed down to Postgres, we must ensure single-task execution + // to avoid each task (whether parallel or sequential) applying the LIMIT independently. + // Setting pages_approx = 0 disables CTID-based task splitting, ensuring a single query. + bind_data.pages_approx = 0; + bind_data.max_threads = 1; op = std::move(op->children[0]); return; diff --git a/test/sql/storage/limit_parallel_bug.test b/test/sql/storage/limit_parallel_bug.test index 06edb7d80..b82853a05 100644 --- a/test/sql/storage/limit_parallel_bug.test +++ b/test/sql/storage/limit_parallel_bug.test @@ -62,6 +62,5 @@ SELECT count(*) FROM (SELECT * FROM s.limit_test_large WHERE grp = 'Group A' LIM statement ok ROLLBACK -# Cleanup statement ok DROP TABLE IF EXISTS s.limit_test_large From b2c1ed3d17d09293cd4fa4b4f3ba90f6547485b9 Mon Sep 17 00:00:00 2001 From: noahisaksen Date: Sun, 4 Jan 2026 17:36:53 -0500 Subject: [PATCH 3/6] remove old redundant check --- src/storage/postgres_optimizer.cpp | 5 ----- test/sql/storage/limit_parallel_bug.test | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/storage/postgres_optimizer.cpp b/src/storage/postgres_optimizer.cpp index 9935d8671..5e77ae6c7 100644 --- a/src/storage/postgres_optimizer.cpp +++ b/src/storage/postgres_optimizer.cpp @@ -53,11 +53,6 @@ static void OptimizePostgresScanLimitPushdown(unique_ptr &op) { } auto &bind_data = get.bind_data->Cast(); - if (bind_data.max_threads != 1 || !bind_data.can_use_main_thread) { - // cannot push down limit/offset if we are not using the main thread - OptimizePostgresScanLimitPushdown(op->children[0]); - return; - } string generated_limit_clause = ""; if (limit.limit_val.Type() != LimitNodeType::UNSET) { diff --git a/test/sql/storage/limit_parallel_bug.test b/test/sql/storage/limit_parallel_bug.test index b82853a05..dc325a9e7 100644 --- a/test/sql/storage/limit_parallel_bug.test +++ b/test/sql/storage/limit_parallel_bug.test @@ -7,7 +7,7 @@ require postgres_scanner require-env POSTGRES_TEST_DATABASE_AVAILABLE statement ok -ATTACH 'host=127.0.0.1 port=5432 dbname=postgresscanner user=postgres' AS s (TYPE POSTGRES) +ATTACH 'dbname=postgresscanner' AS s (TYPE POSTGRES) # Create a large table to enable parallelism statement ok From 8369b0ef59841f54a97685439e50a4ae1994ee26 Mon Sep 17 00:00:00 2001 From: noahisaksen Date: Sun, 4 Jan 2026 17:45:50 -0500 Subject: [PATCH 4/6] update limit.test to reflect new behavior LIMIT is now always pushed down to Postgres with single-task execution enforced, so the EXPLAIN test should expect no LIMIT in the DuckDB plan --- test/sql/storage/limit.test | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/sql/storage/limit.test b/test/sql/storage/limit.test index 15ca5bff4..d554b9c4f 100644 --- a/test/sql/storage/limit.test +++ b/test/sql/storage/limit.test @@ -65,9 +65,10 @@ FROM s.large_tbl LIMIT 5 OFFSET 5 statement ok set explain_output='optimized_only' -# limit is still in plan as we were not able to push down due to parallel execution +# limit is pushed down to Postgres even with pg_pages_per_task=1 +# (we now force single-task execution when LIMIT is pushed down) query II EXPLAIN FROM s.large_tbl LIMIT 5; ---- -logical_opt :.*LIMIT.* +logical_opt :.*LIMIT.* From 70a27059f307f62e7c0636311bce7d20a079cbab Mon Sep 17 00:00:00 2001 From: noahisaksen Date: Sun, 4 Jan 2026 17:56:22 -0500 Subject: [PATCH 5/6] test behaviour of LIMIT --- test/sql/storage/limit_pushdown.test | 144 +++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 test/sql/storage/limit_pushdown.test diff --git a/test/sql/storage/limit_pushdown.test b/test/sql/storage/limit_pushdown.test new file mode 100644 index 000000000..aa7350409 --- /dev/null +++ b/test/sql/storage/limit_pushdown.test @@ -0,0 +1,144 @@ +# name: test/sql/storage/limit_pushdown.test +# description: Test LIMIT pushdown behavior - LIMIT is always pushed to Postgres with single-task execution +# group: [storage] + +require postgres_scanner + +require-env POSTGRES_TEST_DATABASE_AVAILABLE + +statement ok +ATTACH 'host=127.0.0.1 port=5432 dbname=postgresscanner user=postgres' AS s (TYPE POSTGRES) + +statement ok +CREATE OR REPLACE TABLE s.ordered_tbl AS SELECT generate_series AS i FROM generate_series(0, 99999) + +# Verify table has expected row count +query I +SELECT count(*) FROM s.ordered_tbl +---- +100000 + +statement ok +SET explain_output='optimized_only' + +# Test 1: Default settings - LIMIT should be pushed down (not in DuckDB plan) +query II +EXPLAIN SELECT * FROM s.ordered_tbl LIMIT 5; +---- +logical_opt :.*LIMIT.* + +# Verify correct results with default settings +query I +SELECT * FROM s.ordered_tbl LIMIT 5 +---- +0 +1 +2 +3 +4 + +# Test 2: With pg_pages_per_task=1 (would normally cause parallel splitting) +# LIMIT should still be pushed down because we force single-task execution +statement ok +SET pg_pages_per_task=1 + +query II +EXPLAIN SELECT * FROM s.ordered_tbl LIMIT 5; +---- +logical_opt :.*LIMIT.* + +query I +SELECT * FROM s.ordered_tbl LIMIT 5 +---- +0 +1 +2 +3 +4 + +# Test 3: LIMIT with OFFSET +query I +SELECT * FROM s.ordered_tbl LIMIT 5 OFFSET 10 +---- +10 +11 +12 +13 +14 + +# Test 4: LIMIT with ORDER BY +query I +SELECT * FROM s.ordered_tbl ORDER BY i DESC LIMIT 5 +---- +99999 +99998 +99997 +99996 +99995 + +# Test 5: LIMIT with WHERE clause +query I +SELECT * FROM s.ordered_tbl WHERE i >= 1000 LIMIT 5 +---- +1000 +1001 +1002 +1003 +1004 + +# Test 6: LIMIT 1 should return exactly 1 row +query I +SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 1) +---- +1 + +# Test 7: Larger LIMIT values +query I +SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 100) +---- +100 + +query I +SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 1000) +---- +1000 + +# Test 8: LIMIT with very small pages_per_task +statement ok +SET pg_pages_per_task=1 + +query I +SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 50) +---- +50 + +# Test 9: Within a transaction +statement ok +BEGIN + +query I +SELECT * FROM s.ordered_tbl LIMIT 3 +---- +0 +1 +2 + +statement ok +COMMIT + +# Test 10: Reset to default and verify behavior is consistent +statement ok +RESET pg_pages_per_task + +query I +SELECT * FROM s.ordered_tbl LIMIT 5 +---- +0 +1 +2 +3 +4 + +# Cleanup +statement ok +DROP TABLE IF EXISTS s.ordered_tbl From 263f13a51e7102e9f74714b0a3f1beea976dd9bf Mon Sep 17 00:00:00 2001 From: noahisaksen Date: Sun, 4 Jan 2026 18:14:28 -0500 Subject: [PATCH 6/6] use proper attach path --- test/sql/storage/limit_pushdown.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/storage/limit_pushdown.test b/test/sql/storage/limit_pushdown.test index aa7350409..5d2b95589 100644 --- a/test/sql/storage/limit_pushdown.test +++ b/test/sql/storage/limit_pushdown.test @@ -7,7 +7,7 @@ require postgres_scanner require-env POSTGRES_TEST_DATABASE_AVAILABLE statement ok -ATTACH 'host=127.0.0.1 port=5432 dbname=postgresscanner user=postgres' AS s (TYPE POSTGRES) +ATTACH 'dbname=postgresscanner' AS s (TYPE POSTGRES) statement ok CREATE OR REPLACE TABLE s.ordered_tbl AS SELECT generate_series AS i FROM generate_series(0, 99999)