Skip to content

Commit 53567bf

Browse files
merge base
2 parents e30f139 + ff8439c commit 53567bf

1 file changed

Lines changed: 50 additions & 0 deletions

File tree

tests/test_performance.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,53 @@ def test_eq_missing_not_slower_than_element_wise_for_list_columns() -> None:
131131
f"({mean_time_cond:.3f}s vs {mean_time_eq:.3f}s). "
132132
f"Expected comparable performance since list<i64> should use eq_missing directly."
133133
)
134+
135+
136+
def test_element_wise_comparison_slower_than_eq_missing_for_list_columns() -> None:
137+
"""Confirm that comparing list columns with non-tolerance inner types via
138+
eq_missing() is significantly faster than the element-wise
139+
_compare_sequence_columns() path."""
140+
n_rows = 500_000
141+
list_len = 20
142+
num_runs_measured = 10
143+
num_runs_warmup = 2
144+
145+
col_left = f"val_{Side.LEFT}"
146+
col_right = f"val_{Side.RIGHT}"
147+
df = pl.DataFrame(
148+
{
149+
col_left: [list(range(list_len)) for _ in range(n_rows)],
150+
col_right: [list(range(list_len)) for _ in range(n_rows)],
151+
}
152+
)
153+
154+
times_eq = []
155+
times_cond = []
156+
for _ in range(num_runs_warmup + num_runs_measured):
157+
start = time.perf_counter()
158+
df.select(pl.col(col_left).eq_missing(pl.col(col_right))).to_series()
159+
times_eq.append(time.perf_counter() - start)
160+
161+
start = time.perf_counter()
162+
df.select(
163+
condition_equal_columns(
164+
column="val",
165+
dtype_left=df.schema[col_left],
166+
dtype_right=df.schema[col_right],
167+
max_list_length=list_len,
168+
abs_tol=ABS_TOL_DEFAULT,
169+
rel_tol=REL_TOL_DEFAULT,
170+
abs_tol_temporal=ABS_TOL_TEMPORAL_DEFAULT,
171+
)
172+
).to_series()
173+
times_cond.append(time.perf_counter() - start)
174+
175+
mean_time_eq = statistics.mean(times_eq[num_runs_warmup:])
176+
mean_time_cond = statistics.mean(times_cond[num_runs_warmup:])
177+
178+
ratio = mean_time_cond / mean_time_eq
179+
assert ratio > 2.0, (
180+
f"Element-wise comparison was only {ratio:.1f}x slower than eq_missing "
181+
f"({mean_time_cond:.3f}s vs {mean_time_eq:.3f}s). "
182+
f"Expected at least 2x slowdown to justify the optimization."
183+
)

0 commit comments

Comments
 (0)