Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/storage/postgres_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,6 @@ static void OptimizePostgresScanLimitPushdown(unique_ptr<LogicalOperator> &op) {
}

auto &bind_data = get.bind_data->Cast<PostgresBindData>();
if (bind_data.max_threads != 1 || !bind_data.can_use_main_thread) {
// cannot push down limit/offset if we are not using the main thread
OptimizePostgresScanLimitPushdown(op->children[0]);
return;
}

string generated_limit_clause = "";
if (limit.limit_val.Type() != LimitNodeType::UNSET) {
Expand All @@ -69,6 +64,11 @@ static void OptimizePostgresScanLimitPushdown(unique_ptr<LogicalOperator> &op) {

if (!generated_limit_clause.empty()) {
bind_data.limit = generated_limit_clause;
// When LIMIT is pushed down to Postgres, we must ensure single-task execution
// to avoid each task (whether parallel or sequential) applying the LIMIT independently.
// Setting pages_approx = 0 disables CTID-based task splitting, ensuring a single query.
bind_data.pages_approx = 0;
bind_data.max_threads = 1;

op = std::move(op->children[0]);
return;
Expand Down
5 changes: 3 additions & 2 deletions test/sql/storage/limit.test
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,10 @@ FROM s.large_tbl LIMIT 5 OFFSET 5
statement ok
set explain_output='optimized_only'

# limit is still in plan as we were not able to push down due to parallel execution
# limit is pushed down to Postgres even with pg_pages_per_task=1
# (we now force single-task execution when LIMIT is pushed down)

query II
EXPLAIN FROM s.large_tbl LIMIT 5;
----
logical_opt <REGEX>:.*LIMIT.*
logical_opt <!REGEX>:.*LIMIT.*
66 changes: 66 additions & 0 deletions test/sql/storage/limit_parallel_bug.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# name: test/sql/storage/limit_parallel_bug.test
# description: Test LIMIT bug with parallelism (issues #380, #395)
# group: [storage]

require postgres_scanner

require-env POSTGRES_TEST_DATABASE_AVAILABLE

statement ok
ATTACH 'dbname=postgresscanner' AS s (TYPE POSTGRES)

# Create a large table to enable parallelism
statement ok
CREATE OR REPLACE TABLE s.limit_test_large AS
SELECT unnest(generate_series(1, 1000000)) as id,
CASE floor(random() * 3)::int
WHEN 0 THEN 'Group A'
WHEN 1 THEN 'Group B'
ELSE 'Group C'
END as grp

# Force parallelism with small pages_per_task
statement ok
SET pg_pages_per_task=1

# Test 1: Basic LIMIT 1 should return exactly 1 row
query I
SELECT count(*) FROM (SELECT * FROM s.limit_test_large LIMIT 1)
----
1

# Test 2: LIMIT 1 with WHERE clause
query I
SELECT count(*) FROM (SELECT * FROM s.limit_test_large WHERE grp = 'Group A' LIMIT 1)
----
1

# Test 3: Within transaction
statement ok
BEGIN

query I
SELECT count(*) FROM (SELECT * FROM s.limit_test_large LIMIT 1)
----
1

statement ok
ROLLBACK

# Test 4: Transaction with index creation (issue #395 scenario)
statement ok
BEGIN

statement ok
CREATE INDEX IF NOT EXISTS limit_test_idx ON s.limit_test_large (grp)

query I
SELECT count(*) FROM (SELECT * FROM s.limit_test_large WHERE grp = 'Group A' LIMIT 1)
----
1

statement ok
ROLLBACK

statement ok
DROP TABLE IF EXISTS s.limit_test_large
144 changes: 144 additions & 0 deletions test/sql/storage/limit_pushdown.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# name: test/sql/storage/limit_pushdown.test
# description: Test LIMIT pushdown behavior - LIMIT is always pushed to Postgres with single-task execution
# group: [storage]

require postgres_scanner

require-env POSTGRES_TEST_DATABASE_AVAILABLE

statement ok
ATTACH 'dbname=postgresscanner' AS s (TYPE POSTGRES)

statement ok
CREATE OR REPLACE TABLE s.ordered_tbl AS SELECT generate_series AS i FROM generate_series(0, 99999)

# Verify table has expected row count
query I
SELECT count(*) FROM s.ordered_tbl
----
100000

statement ok
SET explain_output='optimized_only'

# Test 1: Default settings - LIMIT should be pushed down (not in DuckDB plan)
query II
EXPLAIN SELECT * FROM s.ordered_tbl LIMIT 5;
----
logical_opt <!REGEX>:.*LIMIT.*

# Verify correct results with default settings
query I
SELECT * FROM s.ordered_tbl LIMIT 5
----
0
1
2
3
4

# Test 2: With pg_pages_per_task=1 (would normally cause parallel splitting)
# LIMIT should still be pushed down because we force single-task execution
statement ok
SET pg_pages_per_task=1

query II
EXPLAIN SELECT * FROM s.ordered_tbl LIMIT 5;
----
logical_opt <!REGEX>:.*LIMIT.*

query I
SELECT * FROM s.ordered_tbl LIMIT 5
----
0
1
2
3
4

# Test 3: LIMIT with OFFSET
query I
SELECT * FROM s.ordered_tbl LIMIT 5 OFFSET 10
----
10
11
12
13
14

# Test 4: LIMIT with ORDER BY
query I
SELECT * FROM s.ordered_tbl ORDER BY i DESC LIMIT 5
----
99999
99998
99997
99996
99995

# Test 5: LIMIT with WHERE clause
query I
SELECT * FROM s.ordered_tbl WHERE i >= 1000 LIMIT 5
----
1000
1001
1002
1003
1004

# Test 6: LIMIT 1 should return exactly 1 row
query I
SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 1)
----
1

# Test 7: Larger LIMIT values
query I
SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 100)
----
100

query I
SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 1000)
----
1000

# Test 8: LIMIT with very small pages_per_task
statement ok
SET pg_pages_per_task=1

query I
SELECT count(*) FROM (SELECT * FROM s.ordered_tbl LIMIT 50)
----
50

# Test 9: Within a transaction
statement ok
BEGIN

query I
SELECT * FROM s.ordered_tbl LIMIT 3
----
0
1
2

statement ok
COMMIT

# Test 10: Reset to default and verify behavior is consistent
statement ok
RESET pg_pages_per_task

query I
SELECT * FROM s.ordered_tbl LIMIT 5
----
0
1
2
3
4

# Cleanup
statement ok
DROP TABLE IF EXISTS s.ordered_tbl
Loading