Skip to content

Commit 9fd6079

Browse files
Merge branch 'benchmark' into optimize
2 parents 55766c4 + ef9aa25 commit 9fd6079

1 file changed

Lines changed: 46 additions & 0 deletions

File tree

tests/test_performance.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,49 @@ def test_eq_missing_not_slower_than_element_wise_for_list_columns() -> None:
131131
f"({mean_time_cond:.3f}s vs {mean_time_eq:.3f}s). "
132132
f"Expected comparable performance since list<i64> should use eq_missing directly."
133133
)
134+
135+
136+
def test_eq_missing_not_slower_than_field_wise_for_struct_columns() -> None:
137+
"""Ensure that comparing struct columns with non-tolerance fields via eq_missing()
138+
is not slower than the field-wise decomposition path."""
139+
n_rows = 500_000
140+
n_fields = 20
141+
num_runs_measured = 10
142+
num_runs_warmup = 2
143+
144+
col_left = f"val_{Side.LEFT}"
145+
col_right = f"val_{Side.RIGHT}"
146+
struct_data = [{f"f{i}": row + i for i in range(n_fields)} for row in range(n_rows)]
147+
df = pl.DataFrame({col_left: struct_data, col_right: struct_data})
148+
149+
times_eq = []
150+
times_cond = []
151+
for _ in range(num_runs_warmup + num_runs_measured):
152+
start = time.perf_counter()
153+
df.select(pl.col(col_left).eq_missing(pl.col(col_right))).to_series()
154+
times_eq.append(time.perf_counter() - start)
155+
156+
start = time.perf_counter()
157+
df.select(
158+
condition_equal_columns(
159+
column="val",
160+
dtype_left=df.schema[col_left],
161+
dtype_right=df.schema[col_right],
162+
max_list_length=None,
163+
abs_tol=ABS_TOL_DEFAULT,
164+
rel_tol=REL_TOL_DEFAULT,
165+
abs_tol_temporal=ABS_TOL_TEMPORAL_DEFAULT,
166+
)
167+
).to_series()
168+
times_cond.append(time.perf_counter() - start)
169+
170+
mean_time_eq = statistics.mean(times_eq[num_runs_warmup:])
171+
mean_time_cond = statistics.mean(times_cond[num_runs_warmup:])
172+
173+
ratio = mean_time_cond / mean_time_eq
174+
assert ratio < 1.25, (
175+
f"condition_equal_columns was {ratio:.1f}x slower than eq_missing "
176+
f"({mean_time_cond:.3f}s vs {mean_time_eq:.3f}s). "
177+
f"Expected comparable performance since struct<i64> fields should use "
178+
f"eq_missing directly."
179+
)

0 commit comments

Comments
 (0)