From 9e39de5daea4032f9d1e94add6e6e145134ade8a Mon Sep 17 00:00:00 2001 From: Kevin Patyk <74557243+Kevin-Patyk@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:35:55 +0100 Subject: [PATCH 01/13] fix(python): Allow `DataTypeExpr` in `pl.lit()` (#26740) --- py-polars/src/polars/functions/lit.py | 8 +++++++- py-polars/tests/unit/functions/test_lit.py | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/py-polars/src/polars/functions/lit.py b/py-polars/src/polars/functions/lit.py index fc24fe572b1b..33b553627448 100644 --- a/py-polars/src/polars/functions/lit.py +++ b/py-polars/src/polars/functions/lit.py @@ -16,6 +16,7 @@ ) from polars._dependencies import numpy as np from polars._utils.wrap import wrap_expr +from polars.datatype_expr import DataTypeExpr from polars.datatypes import BaseExtension, Date, Datetime, Duration, Object from polars.datatypes.convert import DataTypeMappings @@ -28,7 +29,10 @@ def lit( - value: Any, dtype: PolarsDataType | None = None, *, allow_object: bool = False + value: Any, + dtype: PolarsDataType | DataTypeExpr | None = None, + *, + allow_object: bool = False, ) -> Expr: """ Return an expression representing a literal value. @@ -83,6 +87,8 @@ def lit( elif isinstance(dtype, type) and issubclass(dtype, BaseExtension): msg = f"dtype '{dtype}' is a BaseExtension class, it should be an instance" raise TypeError(msg) + elif isinstance(dtype, DataTypeExpr): + return lit(value).cast(dtype) elif dtype == Object: value_s = pl.Series("literal", [value], dtype=dtype) return wrap_expr(plr.lit(value_s._s, allow_object, is_scalar=True)) diff --git a/py-polars/tests/unit/functions/test_lit.py b/py-polars/tests/unit/functions/test_lit.py index cde0203ec4b9..bb641cd493ec 100644 --- a/py-polars/tests/unit/functions/test_lit.py +++ b/py-polars/tests/unit/functions/test_lit.py @@ -279,3 +279,11 @@ def test_lit_object_type_25713() -> None: out = pl.select(pl.lit(obj, dtype=pl.Object)) expected = pl.DataFrame({"literal": [obj]}, schema={"literal": pl.Object}) assert out.to_dict(as_series=False) == expected.to_dict(as_series=False) + + +def test_allow_dtype_expr_lit_26644() -> None: + result = pl.DataFrame().select( + pl.lit(None, pl.dtype_of(pl.lit(["abc"])).list.inner_dtype()) + ) + expected = pl.DataFrame({"literal": pl.Series([None], dtype=pl.String)}) + assert_frame_equal(result, expected) From 45dd3e15531bc5c86054e2f562a639af406988f9 Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Fri, 13 Mar 2026 23:28:22 +1100 Subject: [PATCH 02/13] fix: Fix panic on lazy concat->filter->slice with CSPE (#26907) --- crates/polars-plan/src/plans/optimizer/mod.rs | 6 +- .../src/plans/optimizer/slice_pushdown_lp.rs | 55 ++++++++++--------- .../unit/lazyframe/test_optimizations.py | 10 ++++ 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/crates/polars-plan/src/plans/optimizer/mod.rs b/crates/polars-plan/src/plans/optimizer/mod.rs index 85965c6a662b..43e8b5f76538 100644 --- a/crates/polars-plan/src/plans/optimizer/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/mod.rs @@ -201,8 +201,7 @@ pub fn optimize( if opt_flags.slice_pushdown() { let mut slice_pushdown_opt = SlicePushDown::new(); - let ir = ir_arena.take(root); - let ir = slice_pushdown_opt.optimize(ir, ir_arena, expr_arena)?; + let ir = slice_pushdown_opt.optimize(root, ir_arena, expr_arena)?; ir_arena.replace(root, ir); @@ -246,8 +245,7 @@ pub fn optimize( if repeat_slice_pd_after_filter_pd { let mut slice_pushdown_opt = SlicePushDown::new(); - let ir = ir_arena.take(root); - let ir = slice_pushdown_opt.optimize(ir, ir_arena, expr_arena)?; + let ir = slice_pushdown_opt.optimize(root, ir_arena, expr_arena)?; ir_arena.replace(root, ir); } diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs index 05f878818f8f..5415520ba28d 100644 --- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs +++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs @@ -190,10 +190,9 @@ impl SlicePushDown { let new_inputs = inputs .into_iter() .map(|node| { - let alp = lp_arena.take(node); // No state, so we do not push down the slice here. let state = None; - let alp = self.pushdown(alp, state, lp_arena, expr_arena)?; + let alp = self.pushdown(node, state, lp_arena, expr_arena)?; lp_arena.replace(node, alp); Ok(node) }) @@ -216,8 +215,7 @@ impl SlicePushDown { let new_inputs = inputs .into_iter() .map(|node| { - let alp = lp_arena.take(node); - let alp = self.pushdown(alp, state, lp_arena, expr_arena)?; + let alp = self.pushdown(node, state, lp_arena, expr_arena)?; lp_arena.replace(node, alp); Ok(node) }) @@ -225,17 +223,29 @@ impl SlicePushDown { Ok(lp.with_inputs(new_inputs)) } + /// This will take the `ir_node` from the `lp_arena`, replacing it with `IR::Invalid` (except if + /// `ir_node` is a `IR::Cache`). #[recursive] fn pushdown( &mut self, - lp: IR, + ir_node: Node, state: Option, lp_arena: &mut Arena, expr_arena: &mut Arena, ) -> PolarsResult { use IR::*; - match (lp, state) { + // Don't take this, the node can be referenced multiple times in the tree. + if let IR::Cache { .. } = lp_arena.get(ir_node) { + return self.no_pushdown_restart_opt( + lp_arena.get(ir_node).clone(), + state, + lp_arena, + expr_arena, + ); + } + + match (lp_arena.take(ir_node), state) { #[cfg(feature = "python")] ( PythonScan { mut options }, @@ -305,7 +315,8 @@ impl SlicePushDown { predicate_file_skip_applied, }; - self.pushdown(lp, None, lp_arena, expr_arena) + lp_arena.replace(ir_node, lp); + self.pushdown(ir_node, None, lp_arena, expr_arena) } else { let lp = Scan { sources, @@ -385,8 +396,7 @@ impl SlicePushDown { .map(|len| State { offset: 0, len }); for input in &mut inputs { - let input_lp = lp_arena.take(*input); - let input_lp = self.pushdown(input_lp, subplan_slice, lp_arena, expr_arena)?; + let input_lp = self.pushdown(*input, subplan_slice, lp_arena, expr_arena)?; lp_arena.replace(*input, input_lp); } options.slice = opt_state.map(|x| (x.offset, x.len.try_into().unwrap())); @@ -440,12 +450,10 @@ impl SlicePushDown { } // first restart optimization in both inputs and get the updated LP - let lp_left = lp_arena.take(input_left); - let lp_left = self.pushdown(lp_left, None, lp_arena, expr_arena)?; + let lp_left = self.pushdown(input_left, None, lp_arena, expr_arena)?; let input_left = lp_arena.add(lp_left); - let lp_right = lp_arena.take(input_right); - let lp_right = self.pushdown(lp_right, None, lp_arena, expr_arena)?; + let lp_right = self.pushdown(input_right, None, lp_arena, expr_arena)?; let input_right = lp_arena.add(lp_right); // then assign the slice state to the join operation @@ -476,8 +484,7 @@ impl SlicePushDown { Some(state), ) => { // first restart optimization in inputs and get the updated LP - let input_lp = lp_arena.take(input); - let input_lp = self.pushdown(input_lp, None, lp_arena, expr_arena)?; + let input_lp = self.pushdown(input, None, lp_arena, expr_arena)?; let input = lp_arena.add(input_lp); if let Some(existing_slice) = &mut Arc::make_mut(&mut options).slice { @@ -528,8 +535,7 @@ impl SlicePushDown { }, (Distinct { input, mut options }, Some(state)) => { // first restart optimization in inputs and get the updated LP - let input_lp = lp_arena.take(input); - let input_lp = self.pushdown(input_lp, None, lp_arena, expr_arena)?; + let input_lp = self.pushdown(input, None, lp_arena, expr_arena)?; let input = lp_arena.add(input_lp); if let Some(existing_slice) = &mut options.slice { @@ -594,8 +600,7 @@ impl SlicePushDown { assert!(slice.is_none() || slice == new_slice); // first restart optimization in inputs and get the updated LP - let input_lp = lp_arena.take(input); - let input_lp = self.pushdown(input_lp, None, lp_arena, expr_arena)?; + let input_lp = self.pushdown(input, None, lp_arena, expr_arena)?; let input = lp_arena.add(input_lp); Ok(Sort { @@ -613,8 +618,6 @@ impl SlicePushDown { }, Some(outer_slice), ) => { - let alp = lp_arena.take(input); - // If offset is negative the length can never be greater than it. if offset < 0 { #[allow(clippy::unnecessary_cast)] // Necessary when IdxSize = u64. @@ -626,10 +629,10 @@ impl SlicePushDown { if let Some(combined) = combine_outer_inner_slice(outer_slice, State { offset, len }) { - self.pushdown(alp, Some(combined), lp_arena, expr_arena) + self.pushdown(input, Some(combined), lp_arena, expr_arena) } else { let lp = - self.pushdown(alp, Some(State { offset, len }), lp_arena, expr_arena)?; + self.pushdown(input, Some(State { offset, len }), lp_arena, expr_arena)?; let input = lp_arena.add(lp); self.slice_node_in_optimized_plan = true; Ok(Slice { @@ -647,8 +650,6 @@ impl SlicePushDown { }, None, ) => { - let alp = lp_arena.take(input); - // If offset is negative the length can never be greater than it. if offset < 0 { #[allow(clippy::unnecessary_cast)] // Necessary when IdxSize = u64. @@ -658,7 +659,7 @@ impl SlicePushDown { } let state = Some(State { offset, len }); - self.pushdown(alp, state, lp_arena, expr_arena) + self.pushdown(input, state, lp_arena, expr_arena) }, m @ (Filter { .. }, _) | m @ (DataFrameScan { .. }, _) @@ -809,7 +810,7 @@ impl SlicePushDown { pub fn optimize( &mut self, - logical_plan: IR, + logical_plan: Node, lp_arena: &mut Arena, expr_arena: &mut Arena, ) -> PolarsResult { diff --git a/py-polars/tests/unit/lazyframe/test_optimizations.py b/py-polars/tests/unit/lazyframe/test_optimizations.py index 41a90441a47e..02e4c0e8effd 100644 --- a/py-polars/tests/unit/lazyframe/test_optimizations.py +++ b/py-polars/tests/unit/lazyframe/test_optimizations.py @@ -618,3 +618,13 @@ def test_scan_select_all_columns_no_projection_pyarrow() -> None: ds = pad.dataset(pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})) plan = pl.scan_pyarrow_dataset(ds).select(pl.col("a"), pl.col("b")).explain() assert "PROJECT */2 COLUMNS" in plan + + +def test_slice_pushdown_with_cache_arena_take_panic_26905() -> None: + lf = pl.LazyFrame({"x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}) + q = pl.concat([lf, lf]).select(pl.all()).filter(pl.col("x") > 3).head(2) + + assert_frame_equal( + q.collect(), + pl.DataFrame({"x": [4, 5]}), + ) From 914c8c56027804c5f12eb3a8ecd11f1f0199fd0d Mon Sep 17 00:00:00 2001 From: Thijs Nieuwdorp Date: Mon, 16 Mar 2026 10:28:34 +0100 Subject: [PATCH 03/13] docs: Query Profiler addition to User Guide (#26623) --- .../polars-cloud/run/distributed-engine.md | 2 +- docs/source/polars-cloud/run/glossary.md | 6 +- docs/source/polars-cloud/run/query-profile.md | 309 ++++++++++-------- .../src/python/polars-cloud/query-profile.py | 92 ++++-- 4 files changed, 257 insertions(+), 152 deletions(-) diff --git a/docs/source/polars-cloud/run/distributed-engine.md b/docs/source/polars-cloud/run/distributed-engine.md index eba421e4895c..ec982f9c54ef 100644 --- a/docs/source/polars-cloud/run/distributed-engine.md +++ b/docs/source/polars-cloud/run/distributed-engine.md @@ -32,7 +32,7 @@ result = ( This example demonstrates running query 3 of the PDS-H benchmarkon scale factor 100 (approx. 100GB of data) using Polars Cloud distributed engine. -!!! note "Run the example yourself" +!!! example "Run the example yourself" Copy and paste the code to you environment and run it. The data is hosted in S3 buckets that use [AWS Requester Pays](https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html), meaning you pay only for pays the cost of the request and the data download from the bucket. The storage costs are covered. diff --git a/docs/source/polars-cloud/run/glossary.md b/docs/source/polars-cloud/run/glossary.md index 838b4dbca8b6..0bf7ea0341e0 100644 --- a/docs/source/polars-cloud/run/glossary.md +++ b/docs/source/polars-cloud/run/glossary.md @@ -70,9 +70,9 @@ completion back to the scheduler and write shuffle output for downstream stages The **stage graph** is produced by the distributed query planner from the optimized logical plan. The planner walks the logical plan and identifies **stage boundaries**: points where a data shuffle -is required to optimize stages to maximize parallelism, minimize data shuffle, and keep peak memory -usage under control. Joins and group-bys are typical examples, a worker cannot produce its final -result without first receiving the relevant keys or partial aggregates from other workers. +is required. The planner optimizes stages to maximize parallelism, minimize data shuffle, and keep +peak memory usage under control. Joins and group-bys are typical examples; a worker cannot produce +its final result without first receiving the relevant keys or partial aggregates from other workers. At each stage boundary, the planner inserts a shuffle and starts a new stage. The result is a directed acyclic graph (DAG) in which each node is a stage and each edge is a shuffle. All workers diff --git a/docs/source/polars-cloud/run/query-profile.md b/docs/source/polars-cloud/run/query-profile.md index a2d57ce8b5b4..47b06616b59c 100644 --- a/docs/source/polars-cloud/run/query-profile.md +++ b/docs/source/polars-cloud/run/query-profile.md @@ -1,131 +1,184 @@ # Query profiling Monitor query execution across workers to identify bottlenecks, understand data flow, and optimize -performance. You can see which stages are running, how data moves between workers, and where time is -spent during execution. - -This visibility helps you optimize complex queries and better understand the distributed execution -of queries. - -
-Example query and dataset - -You can copy and paste the example below to explore the feature yourself. Don't forget to change the -workspace name to one of your own workspaces. - -```python -import polars as pl -import polars_cloud as pc - -pc.authenticate() - -ctx = pc.ComputeContext(workspace="your-workspace", cpus=12, memory=12, cluster_size=4) - -def pdsh_q3(customer, lineitem, orders): - return ( - customer.filter(pl.col("c_mktsegment") == "BUILDING") - .join(orders, left_on="c_custkey", right_on="o_custkey") - .join(lineitem, left_on="o_orderkey", right_on="l_orderkey") - .filter(pl.col("o_orderdate") < pl.date(1995, 3, 15)) - .filter(pl.col("l_shipdate") > pl.date(1995, 3, 15)) - .with_columns( - (pl.col("l_extendedprice") * (1 - pl.col("l_discount"))).alias("revenue") - ) - .group_by("o_orderkey", "o_orderdate", "o_shippriority") - .agg(pl.sum("revenue")) - .select( - pl.col("o_orderkey").alias("l_orderkey"), - "revenue", - "o_orderdate", - "o_shippriority", - ) - .sort(by=["revenue", "o_orderdate"], descending=[True, False]) - ) - -lineitem = pl.scan_parquet( - "s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/lineitem/*.parquet", - storage_options={"request_payer": "true"}, -) -customer = pl.scan_parquet( - "s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/customer/*.parquet", - storage_options={"request_payer": "true"}, -) -orders = pl.scan_parquet( - "s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/orders/*.parquet", - storage_options={"request_payer": "true"}, -) -``` - -
- -{{code_block('polars-cloud/query-profile','execute',[])}} - -The `await_profile` method can be used to monitor an in-progress query. It returns a QueryProfile -object containing a DataFrame with information about which stages are being processed across -workers, which can be analyzed in the same way as any Polars query. - -{{code_block('polars-cloud/query-profile','await_profile',[])}} - -Each row represents one worker processing a span. A span represents a chunk of work done by a -worker, for example generating the query plan, reading data from another worker, or executing the -query on that data. Some spans may output data, which is recorded in the output_rows column. - -```text -shape: (53, 6) -┌──────────────┬──────────────┬───────────┬─────────────────────┬────────────────────┬─────────────┬───────────────────────┬────────────────────┐ -│ stage_number ┆ span_name ┆ worker_id ┆ start_time ┆ end_time ┆ output_rows ┆ shuffle_bytes_written ┆ shuffle_bytes_read │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ │ -│ u32 ┆ str ┆ str ┆ datetime[ns] ┆ datetime[ns] ┆ u64 ┆ u64 ┆ u64 │ -╞══════════════╪══════════════╪═══════════╪═════════════════════╪════════════════════╪═════════════╪═══════════════════════╪════════════════════╡ -│ 6 ┆ Execute IR ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ 282794 ┆ 72395264 ┆ null │ -│ ┆ ┆ ┆ 08:08:52.820228585 ┆ 08:08:52.878229914 ┆ ┆ ┆ │ -│ 3 ┆ Execute IR ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ 3643370 ┆ 932702720 ┆ null │ -│ ┆ ┆ ┆ 08:08:45.421053731 ┆ 08:08:45.600081475 ┆ ┆ ┆ │ -│ 5 ┆ Execute IR ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ 282044 ┆ 723203264 ┆ null │ -│ ┆ ┆ ┆ 08:08:52.667547917 ┆ 08:08:52.718114297 ┆ ┆ ┆ │ -│ 5 ┆ Shuffle read ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ null ┆ null ┆ 932702720 │ -│ ┆ ┆ ┆ 08:08:52.694917167 ┆ 08:08:52.720657155 ┆ ┆ ┆ │ -│ 7 ┆ Execute IR ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ 145179 ┆ 37165824 ┆ null │ -│ ┆ ┆ ┆ 08:08:53.039771274 ┆ 08:08:53.166535930 ┆ ┆ ┆ │ -│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │ -│ 5 ┆ Shuffle read ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ null ┆ null ┆ 72503808 │ -│ ┆ ┆ ┆ 08:08:52.649434841 ┆ 08:08:52.667065947 ┆ ┆ ┆ │ -│ 6 ┆ Execute IR ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ 283218 ┆ 72503808 ┆ null │ -│ ┆ ┆ ┆ 08:08:52.818787714 ┆ 08:08:52.880324797 ┆ ┆ ┆ │ -│ 4 ┆ Shuffle read ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ null ┆ null ┆ 3979787264 │ -│ ┆ ┆ ┆ 08:08:46.188322234 ┆ 08:08:50.871792346 ┆ ┆ ┆ │ -│ 1 ┆ Execute IR ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ 15546044 ┆ 3979787264 ┆ null │ -│ ┆ ┆ ┆ 08:08:40.325404872 ┆ 08:08:44.030028095 ┆ ┆ ┆ │ -│ 7 ┆ Shuffle read ┆ i-xxx ┆ 2025-xx-xx ┆ 2025-xx-xx ┆ null ┆ null ┆ 37165824 │ -│ ┆ ┆ ┆ 08:08:52.925442390 ┆ 08:08:52.962600065 ┆ ┆ ┆ │ -└──────────────┴──────────────┴───────────┴─────────────────────┴────────────────────┴─────────────┴───────────────────────┴────────────────────┘ -``` - -As each worker starts and completes each stage of the query, it notifies the lead worker. The -`await_profile` method will poll the lead worker until there is an update from any worker, and then -return the full profile data of the query. - -The QueryProfile object also has a summary property to return an aggregated view of each stage. - -{{code_block('polars-cloud/query-profile','await_summary',[])}} - -```text -shape: (13, 6) -┌──────────────┬──────────────┬───────────┬────────────┬──────────────┬─────────────┬───────────────────────┬────────────────────┐ -│ stage_number ┆ span_name ┆ completed ┆ worker_ids ┆ duration ┆ output_rows ┆ shuffle_bytes_written ┆ shuffle_bytes_read │ -│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ u32 ┆ str ┆ bool ┆ str ┆ duration[μs] ┆ u64 ┆ u64 ┆ u64 │ -╞══════════════╪══════════════╪═══════════╪════════════╪══════════════╪═════════════╪═══════════════════════╪════════════════════╡ -│ 6 ┆ Shuffle read ┆ true ┆ i-xxx ┆ 1228µs ┆ 0 ┆ 0 ┆ 289546496 │ -│ 5 ┆ Shuffle read ┆ true ┆ i-xxx ┆ 140759µs ┆ 0 ┆ 0 ┆ 289546496 │ -│ 4 ┆ Execute IR ┆ true ┆ i-xxx ┆ 1s 73534µs ┆ 1131041 ┆ 289546496 ┆ 0 │ -│ 2 ┆ Execute IR ┆ true ┆ i-xxx ┆ 6s 944740µs ┆ 3000188 ┆ 768048128 ┆ 0 │ -│ 5 ┆ Execute IR ┆ true ┆ i-xxx ┆ 167483µs ┆ 1131041 ┆ 289546496 ┆ 0 │ -│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │ -│ 4 ┆ Shuffle read ┆ true ┆ i-xxx ┆ 4s 952005µs ┆ 0 ┆ 0 ┆ 255627121 │ -│ 1 ┆ Execute IR ┆ true ┆ i-xxx ┆ 7s 738907µs ┆ 72874383 ┆ 18655842048 ┆ 0 │ -│ 3 ┆ Shuffle read ┆ true ┆ i-xxx ┆ 812807µs ┆ 0 ┆ 0 ┆ 768048128 │ -│ 0 ┆ Execute IR ┆ true ┆ i-xxx ┆ 15s 2883µs ┆ 323494519 ┆ 82814596864 ┆ 0 │ -│ 7 ┆ Execute IR ┆ true ┆ i-xxx ┆ 356662µs ┆ 1131041 ┆ 289546496 ┆ 0 │ -└──────────────┴──────────────┴───────────┴────────────┴──────────────┴─────────────┴───────────────────────┴────────────────────┘ -``` +performance. + +## Types of operations in a query + +To optimize a query it helps to understand where it spends its time. Each worker in a distributed +query does three things: it reads data, computes on it, and exchanges data with other workers. + +**Input/Output**: Each worker reads its assigned [partitions](glossary.md#partition) from storage +and writes results to a destination. These are typically the first and last activities you see in +the profiler. I/O-heavy queries benefit from more network bandwidth, either by adding more nodes or +by choosing a higher-bandwidth instance type. + +**Computation**: Workers execute the query operations (such as filters, joins, aggregations, etc.) +on their local data. CPU and memory usage are visible in the resource overview of the nodes. + +**Shuffling**: Some operations, such as joins and group-bys, require all rows with a given key to be +on the same worker. To accomplish this, data is redistributed across the cluster in a +[shuffle](glossary.md#shuffle) between stages. Within a stage, the streaming engine processes +incoming shuffle data as it arrives over the network, so I/O and computation overlap. Shuffle-heavy +queries produce large volumes of inter-node traffic, visible as network bandwidth usage in the +cluster dashboard and as a high percentage of time spent shuffling in the metrics. + +## Using the query profiler + +The cluster dashboard and built-in query profiler are available through the Polars Cloud compute +dashboard. + +The profiler shows detailed metrics, both real-time and after query completion, such as workers' +resource usage and the percentage of time spent shuffling. + +![Cluster dashboard](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/cluster-dashboard.png) + +### Single Node Query + +Our first example is a query that runs on a single node. If you'd like you can run this in your own +environment so you can explore the functionality yourself. + +??? example "Try it: Single node query" + + Queries can be run on a single node by marking your query like so: + + ```python + query.remote(ctx).single_node().execute() + ``` + + This will let the query run on a single worker. This simplifies query execution and you don't + need to shuffle data between workers. Copy and paste the example below to explore the feature + yourself. Don't forget to change the workspace name to one of your own workspaces. + + {{code_block('polars-cloud/query-profile','single-node-query',[])}} + +#### Query plans + +You can inspect the details of a query by going to the "Queries" tab and selecting the query you +want to inspect. You can see the timeline, which shows when the query started and ended, and how +long planning and running the query took. On top of that it consists of a single stage, because the +query runs completely on a single node. + +At the bottom of the query details you can inspect the +[optimized logical plan](glossary.md#optimized-logical-plan) and the +[physical plan](glossary.md#physical-plan): + +![Query details](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/query-details.png) + +The logical plan is a graph representation that shows what your query will do, and how your query +has been optimized. Clicking nodes in the plan gives you more details about the operation that will +be performed: + + +![Logical plan](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/logical-plan.png){ width="50%" style="display: block; margin: 0 auto;" } + +The physical plan shows how the engine executes your query: the concrete algorithms, operator +implementations, and data flow chosen at runtime. + + +![Physical plan](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/physical-plan.png){ width="70%" style="display: block; margin: 0 auto;" } + +While the query runs and after it has finished, there are additional metrics available, such as how +many rows and morsels flow through a node and how much time is spent in that node. In our example +you can see that the group by takes particularly long and aggregates an input of 59.1 million rows +to 4 output rows: + + +![Group By node example](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/group-by-node.png){ width="50%" style="display: block; margin: 0 auto;" } + +This makes sense because this query performs a list of aggregations, as we can see in the node +details information in the logical plan: + + +![Node details example](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/node-details.png){ width="50%" style="display: block; margin: 0 auto;" } + +The indication that most time is spent in the GroupBy node matches our expectations for this query. + +#### Indicators + +Modes in the physical plan or stages in the stage graph can show indicators to help identify +bottlenecks: + +| Indicator | Description | +| ------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ![CPU time](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/cpu-time.png) | Shows which operations took the most CPU time. | +| ![I/O time](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/io-time.png) | Percentage of the stage's total I/O time spent in this node, helping identify the most I/O-heavy operations. | +| ![Memory intensive](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/indicator-memory-intensive.png) | The node is potentially memory-intensive because the operation requires keeping state (e.g. storing the intermediate groups in a `group_by`). | +| ![Single node](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/indicator-single-node.png) | This stage was executed on a single node because it contains operations that require a global state (e.g. `sort`). This indicator only appears in distributed queries. | +| ![In-memory fallback](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/indicator-in-memory.png) | This operation is currently not supported on the streaming engine and was executed on the in-memory engine. | + +!!! info "I/O and CPU time don't sum to 100%" + + The I/O time and CPU time percentages shown per node do not sum to the total runtime. This is because execution is pipelined: data is processed as it arrives, so I/O (reading/writing) and CPU (computation) work happens concurrently. As a result, both indicators can be non-zero at the same time for a given node, and their combined total can exceed the total runtime. + +### Distributed Query + +The following section is based on a distributed query. You can follow along with this example code: + +??? example "Try it: Distributed query" + + Distributed is the default execution mode in Polars Cloud. You can also set it explicitly: + + ```python + query.remote(ctx).distributed().execute() + ``` + + For more on how distributed execution works, see [Distributed queries](distributed-engine.md). + Copy and paste the example below to explore the feature yourself. Don't forget to change the + workspace name to one of your own workspaces. + + {{code_block('polars-cloud/query-profile','distributed-query',[])}} + +#### Stage graph + +When executing distributed queries, queries are often executed in [stages](glossary.md#stage). Some +operations require [shuffles](glossary.md#shuffle) to make sure the correct +[partitions](glossary.md#partition) are available to the workers. To accomplish this, data is +shuffled between workers over the network. Each stage can be expanded to inspect the operations it +contains and understand what work is happening at each point in the pipeline. + +When you execute the example query, you get the result that can be seen in the image below. In the +stage graph, one of the scan stages at the bottom stands out: its indicator shows a high percentage +of total time spent in that stage. + +![Stage graph with node details](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/stage-graph-node-details.png) + +When you click on that stage (not one of the nodes in it), you open the stage details, displaying +detailed metrics. You can notice that the I/O time of this stage is roughly 55%. + +![Example of heavy stage](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/stage-example.png) + +Through the details you can open the physical plan of this stage. This will display all of the +operations in this stage, how long they took, and any indicators that might help you find +bottlenecks. + + +![Example of stage's physical plan](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/stage-physical-plan-example.png){ width="50%" style="display: block; margin: 0 auto;" } + +One thing you should immediately notice is that the MultiScan node at the bottom takes almost 100% +of the time for I/O: + + +![I/O time](https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/docs/query-profiler/io-time.png){ style="display: block; margin: 0 auto;" } + +This I/O indicator shows that I/O was active for nearly the full runtime of the stage. We can +conclude that the network I/O in this node is the bottleneck in this part of the physical plan. + +In this example the data is stored in `us-east-2` while the cluster runs in `eu-west-1`. The +cross-region bandwidth causes I/O to take longer than it would if the data and cluster were in the +same region. Co-locate your cluster and data in the same region to minimize I/O latency. + +## Takeaways + +- The [logical plan](glossary.md#optimized-logical-plan) shows how your query has been optimized. +- The [physical plan](glossary.md#physical-plan) shows how your query is executed, and which + operations are responsible for both CPU and I/O time spent. +- In a distributed query, the [stage graph](glossary.md#stage-graph) shows which + [stages](glossary.md#stage) take the longest and how much data is [shuffled](glossary.md#shuffle) + between them. +- Indicators on stages and nodes highlight potential bottlenecks: start with the slowest stage and + drill down to individual operations. +- I/O-heavy queries benefit from more bandwidth: you can add nodes or choose a higher-bandwidth + instance type. +- [Shuffle](glossary.md#shuffle)-heavy queries may benefit from fewer, larger nodes to reduce + inter-node traffic. diff --git a/docs/source/src/python/polars-cloud/query-profile.py b/docs/source/src/python/polars-cloud/query-profile.py index dc2600a3a811..8543005acd43 100644 --- a/docs/source/src/python/polars-cloud/query-profile.py +++ b/docs/source/src/python/polars-cloud/query-profile.py @@ -1,33 +1,85 @@ """ -from typing import cast - +# --8<-- [start:single-node-query] import polars as pl import polars_cloud as pc +from datetime import date + +pc.authenticate() +ctx = pc.ComputeContext(workspace="your-workspace", cpus=8, memory=8, cluster_size=1) -def pdsh_q3( - customer: pl.LazyFrame, lineitem: pl.LazyFrame, orders: pl.LazyFrame -) -> pl.LazyFrame: - pass +lineitem = pl.scan_parquet("s3://polars-cloud-samples-us-east-2-prd/pdsh/sf10/lineitem.parquet", + storage_options={"request_payer": "true"} +) +var1 = date(1998, 9, 2) +( + lineitem.filter(pl.col("l_shipdate") <= var1) + .group_by("l_returnflag", "l_linestatus") + .agg( + pl.sum("l_quantity").alias("sum_qty"), + pl.sum("l_extendedprice").alias("sum_base_price"), + (pl.col("l_extendedprice") * (1.0 - pl.col("l_discount"))) + .sum() + .alias("sum_disc_price"), + ( + pl.col("l_extendedprice") + * (1.0 - pl.col("l_discount")) + * (1.0 + pl.col("l_tax")) + ) + .sum() + .alias("sum_charge"), + pl.mean("l_quantity").alias("avg_qty"), + pl.mean("l_extendedprice").alias("avg_price"), + pl.mean("l_discount").alias("avg_disc"), + pl.len().alias("count_order"), + ) + .sort("l_returnflag", "l_linestatus") +).remote(ctx).single_node().execute() +# --8<-- [end:single-node-query] -customer = pl.LazyFrame() -lineitem = pl.LazyFrame() -orders = pl.LazyFrame() +# --8<-- [start:distributed-query] +import polars as pl +import polars_cloud as pc -ctx = pc.ComputeContext() +pc.authenticate() -# --8<-- [start:execute] -query = pdsh_q3(customer, lineitem, orders).remote(ctx).distributed().execute() -# --8<-- [end:execute] +ctx = pc.ComputeContext(workspace="your-workspace", cpus=12, memory=12, cluster_size=4) -query = cast("pc.DirectQuery", query) +def pdsh_q3(customer, lineitem, orders): + return ( + customer.filter(pl.col("c_mktsegment") == "BUILDING") + .join(orders, left_on="c_custkey", right_on="o_custkey") + .join(lineitem, left_on="o_orderkey", right_on="l_orderkey") + .filter(pl.col("o_orderdate") < pl.date(1995, 3, 15)) + .filter(pl.col("l_shipdate") > pl.date(1995, 3, 15)) + .with_columns( + (pl.col("l_extendedprice") * (1 - pl.col("l_discount"))).alias("revenue") + ) + .group_by("o_orderkey", "o_orderdate", "o_shippriority") + .agg(pl.sum("revenue")) + .select( + pl.col("o_orderkey").alias("l_orderkey"), + "revenue", + "o_orderdate", + "o_shippriority", + ) + .sort(by=["revenue", "o_orderdate"], descending=[True, False]) + ) -# --8<-- [start:await_profile] -query.await_profile().data -# --8<-- [end:await_profile] +lineitem = pl.scan_parquet( + "s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/lineitem/*.parquet", + storage_options={"request_payer": "true"}, +) +customer = pl.scan_parquet( + "s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/customer/*.parquet", + storage_options={"request_payer": "true"}, +) +orders = pl.scan_parquet( + "s3://polars-cloud-samples-us-east-2-prd/pdsh/sf100/orders/*.parquet", + storage_options={"request_payer": "true"}, +) -# --8<-- [start:await_summary] -query.await_profile().summary -# --8<-- [end:await_summary] +pdsh_q3(customer, lineitem, orders).remote(ctx).distributed().execute() +# --8<-- [end:distributed-query] """ From 16725ce56adacad4b1b2c8e3cc52efc3dc921e49 Mon Sep 17 00:00:00 2001 From: GAUTAM V DATLA <85986314+gautamvarmadatla@users.noreply.github.com> Date: Mon, 16 Mar 2026 07:38:07 -0400 Subject: [PATCH 04/13] fix: Propagate null in `min_by` / `max_by` for all-null by groups (#26919) --- .../src/expressions/aggregation.rs | 21 ++++---- .../aggregation/test_aggregations.py | 50 +++++++++++++++++++ 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 826e9f5c031a..ffcb1cc14354 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -9,7 +9,6 @@ use polars_core::utils::{_split_offsets, NoNull}; use polars_ops::prelude::ArgAgg; #[cfg(feature = "propagate_nans")] use polars_ops::prelude::nan_propagating_aggregate; -use polars_utils::itertools::Itertools; use rayon::prelude::*; use super::*; @@ -712,21 +711,23 @@ impl PhysicalExpr for AggMinMaxByExpr { unsafe { by_col.agg_arg_min(&by_groups) } }; let idxs_in_groups: &IdxCa = idxs_in_groups.as_materialized_series().as_ref().as_ref(); - let flat_gather_idxs = match input_groups.as_ref().as_ref() { + let gather_idxs: IdxCa = match input_groups.as_ref().as_ref() { GroupsType::Idx(g) => idxs_in_groups - .into_no_null_iter() + .iter() .enumerate() - .map(|(group_idx, idx_in_group)| g.all()[group_idx][idx_in_group as usize]) - .collect_vec(), + .map(|(group_idx, idx_in_group)| { + idx_in_group.map(|i| g.all()[group_idx][i as usize]) + }) + .collect(), GroupsType::Slice { groups, .. } => idxs_in_groups - .into_no_null_iter() + .iter() .enumerate() - .map(|(group_idx, idx_in_group)| groups[group_idx][0] + idx_in_group) - .collect_vec(), + .map(|(group_idx, idx_in_group)| idx_in_group.map(|i| groups[group_idx][0] + i)) + .collect(), }; - // SAFETY: All indices are within input_col's groups. - let gathered = unsafe { input_col.take_slice_unchecked(&flat_gather_idxs) }; + // SAFETY: All non-null indices are within input_col's groups. + let gathered = unsafe { input_col.take_unchecked(&gather_idxs) }; let agg_state = AggregatedScalar(gathered.with_name(keep_name)); Ok(AggregationContext::from_agg_state( agg_state, diff --git a/py-polars/tests/unit/operations/aggregation/test_aggregations.py b/py-polars/tests/unit/operations/aggregation/test_aggregations.py index ef6a747f4e19..6891b208beb8 100644 --- a/py-polars/tests/unit/operations/aggregation/test_aggregations.py +++ b/py-polars/tests/unit/operations/aggregation/test_aggregations.py @@ -1512,3 +1512,53 @@ def test_min_max_by_on_boolean_26847( df = pl.DataFrame({"a": [1] * 10, "b": [True] * 10}) result = df.select(agg(pl.col("a"), pl.col("b"))) assert result.item() == expected + + +@pytest.mark.parametrize("agg", [pl.Expr.min_by, pl.Expr.max_by]) +def test_min_max_by_all_null_by_group(agg: Callable[..., pl.Expr]) -> None: + df = pl.DataFrame( + { + "g": ["a", "a", "b"], + "val": [1, 2, 3], + "by": pl.Series([None, None, 5], dtype=pl.Int64), + } + ) + expected = pl.DataFrame( + {"g": ["a", "b"], "val": pl.Series([None, 3], dtype=pl.Int64)} + ) + + eager = df.group_by("g", maintain_order=True).agg(agg(pl.col("val"), pl.col("by"))) + assert_frame_equal(eager, expected) + + streaming = ( + df.lazy() + .group_by("g", maintain_order=True) + .agg(agg(pl.col("val"), pl.col("by"))) + .collect(engine="streaming") + ) + assert_frame_equal(streaming, expected) + + +@pytest.mark.parametrize("agg", [pl.Expr.min_by, pl.Expr.max_by]) +def test_min_max_by_all_null_by_group_slice(agg: Callable[..., pl.Expr]) -> None: + df = pl.DataFrame( + { + "dt": [date(2020, 1, 1), date(2020, 1, 1), date(2020, 2, 1)], + "val": [1, 2, 3], + "by": pl.Series([None, None, 5], dtype=pl.Int64), + } + ) + expected = pl.DataFrame( + { + "dt": [date(2020, 1, 1), date(2020, 2, 1)], + "val": pl.Series([None, 3], dtype=pl.Int64), + } + ) + + result = ( + df.lazy() + .group_by_dynamic("dt", every="1mo") + .agg(agg(pl.col("val"), pl.col("by"))) + .collect() + ) + assert_frame_equal(result, expected) From 1b71c72e8015ff5b7009908af5d56c277b3f437a Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Tue, 17 Mar 2026 00:18:37 +1100 Subject: [PATCH 05/13] fix: Fix error passing `Series` of dates to business functions (#26927) --- .../src/plans/conversion/type_coercion/mod.rs | 50 +++++++++++++++++++ .../unit/functions/test_business_day_count.py | 8 +++ .../temporal/test_add_business_days.py | 19 +++++++ .../temporal/test_is_business_day.py | 7 +++ 4 files changed, 84 insertions(+) diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs b/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs index 7b54d2d6a76a..1e0f7395f29b 100644 --- a/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs +++ b/crates/polars-plan/src/plans/conversion/type_coercion/mod.rs @@ -766,6 +766,56 @@ impl OptimizationRule for TypeCoercionRule { options, }) }, + #[cfg(feature = "business")] + AExpr::Function { + function: IRFunctionExpr::Business(ref business_fn), + ref input, + options, + } => { + let holiday_arg_idx: usize = match business_fn { + IRBusinessFunction::AddBusinessDay { .. } + | IRBusinessFunction::BusinessDayCount { .. } => 2, + IRBusinessFunction::IsBusinessDay { .. } => 1, + }; + + let holiday_arg = unpack!(input.get(holiday_arg_idx)); + + // We implode, only for literal Series(dtype=Date), as this is considered a valid + // parameter on the Python API as an `Iterable[date]`. + let new_lv_ae: AExpr = match expr_arena.get(holiday_arg.node()) { + AExpr::Literal(LiteralValue::Series(s)) if s.dtype() == &DataType::Date => { + AExpr::Literal(LiteralValue::Series(SpecialEq::new( + s.implode().unwrap().into_series(), + ))) + }, + ae => { + let dtype = ae.to_dtype(&ToFieldContext::new(expr_arena, schema))?; + + let is_list_of_date = match &dtype { + DataType::List(inner) => inner.as_ref() == &DataType::Date, + _ => false, + }; + + polars_ensure!( + is_list_of_date, + ComputeError: + "dtype of holidays list must be List(Date), got {dtype:?} instead" + ); + + return Ok(None); + }, + }; + + let mut input = input.clone(); + let function = IRFunctionExpr::Business(business_fn.clone()); + input[holiday_arg_idx].set_node(expr_arena.add(new_lv_ae)); + + Some(AExpr::Function { + input, + function, + options, + }) + }, #[cfg(feature = "list_gather")] AExpr::Function { function: ref function @ IRFunctionExpr::ListExpr(IRListFunction::Gather(_)), diff --git a/py-polars/tests/unit/functions/test_business_day_count.py b/py-polars/tests/unit/functions/test_business_day_count.py index 883fa84ebb1a..8673d40ec12f 100644 --- a/py-polars/tests/unit/functions/test_business_day_count.py +++ b/py-polars/tests/unit/functions/test_business_day_count.py @@ -135,6 +135,14 @@ def test_business_day_count_w_holidays() -> None: expected = pl.Series("business_day_count", [0, 5, 5], pl.Int32) assert_series_equal(result, expected) + result = df.select( + business_day_count=pl.business_day_count( + "start", "end", holidays=pl.Series([date(2020, 1, 1), date(2020, 1, 9)]) + ), + )["business_day_count"] + expected = pl.Series("business_day_count", [0, 5, 5], pl.Int32) + assert_series_equal(result, expected) + @given( start=st.dates(min_value=dt.date(1969, 1, 1), max_value=dt.date(1970, 12, 31)), diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py index dedca6a68937..4810c65cf46f 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_add_business_days.py @@ -147,6 +147,25 @@ def test_add_business_days_w_holidays() -> None: ) assert_series_equal(result, expected) + result = df.select( + result=pl.col("start").dt.add_business_days( + "n", + holidays=pl.Series( + [ + date(2019, 1, 1), + date(2020, 1, 1), + date(2020, 1, 2), + date(2021, 1, 1), + ] + ), + roll="backward", + ), + )["result"] + expected = pl.Series( + "result", [date(2020, 1, 3), date(2020, 1, 9), date(2020, 1, 13)] + ) + assert_series_equal(result, expected) + def test_add_business_days_multiple_holidays() -> None: base_df = pl.DataFrame( diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_is_business_day.py b/py-polars/tests/unit/operations/namespaces/temporal/test_is_business_day.py index 60b2030737ab..489cd501c382 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_is_business_day.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_is_business_day.py @@ -58,6 +58,13 @@ def test_is_business_day( )["date"] expected = pl.Series("date", expected_values) assert_series_equal(result, expected) + result = df.select( + pl.col("date").dt.is_business_day( + holidays=pl.Series(holidays, dtype=pl.Date), week_mask=week_mask + ) + )["date"] + expected = pl.Series("date", expected_values) + assert_series_equal(result, expected) # Holidays are in Series of List of Date, of length 1: result = df.select( pl.col("date").dt.is_business_day( From 259723faf39dd820aa38793063527bb1a608b477 Mon Sep 17 00:00:00 2001 From: ritchie46 Date: Mon, 16 Mar 2026 14:20:49 +0100 Subject: [PATCH 06/13] Python Polars 1.39.1 --- Cargo.lock | 4 ++-- crates/polars-python/src/c_api/mod.rs | 2 +- py-polars/pyproject.toml | 8 ++++---- py-polars/runtime/polars-runtime-32/Cargo.toml | 2 +- py-polars/runtime/polars-runtime-64/Cargo.toml | 2 +- py-polars/runtime/polars-runtime-compat/Cargo.toml | 2 +- py-polars/runtime/template/Cargo.template.toml | 2 +- py-polars/src/polars/_plr.py | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d063bc7cdd10..00e1676d9c5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3561,7 +3561,7 @@ dependencies = [ [[package]] name = "polars-runtime-64" -version = "1.39.0" +version = "1.39.1" dependencies = [ "either", "libc", @@ -3572,7 +3572,7 @@ dependencies = [ [[package]] name = "polars-runtime-compat" -version = "1.39.0" +version = "1.39.1" dependencies = [ "either", "libc", diff --git a/crates/polars-python/src/c_api/mod.rs b/crates/polars-python/src/c_api/mod.rs index 1019cfb8ce87..83403aac7bca 100644 --- a/crates/polars-python/src/c_api/mod.rs +++ b/crates/polars-python/src/c_api/mod.rs @@ -4,7 +4,7 @@ pub mod allocator; // Since Python Polars cannot share its version into here and we need to be able to build this // package correctly without `py-polars`, we need to mirror the version here. // example: 1.35.0-beta.1 -pub static PYPOLARS_VERSION: &str = "1.39.0"; +pub static PYPOLARS_VERSION: &str = "1.39.1"; // We allow multiple features to be set simultaneously so checking with all-features // is possible. In the case multiple are set or none at all, we set the repr to "unknown". diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 58548001298b..71586beef8ce 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Ritchie Vink", email = "ritchie46@gmail.com" }, ] # example: 1.35.0b1 -version = "1.39.0" +version = "1.39.1" license = { file = "LICENSE" } requires-python = ">=3.10" @@ -33,7 +33,7 @@ classifiers = [ "Typing :: Typed", ] # example: 1.35.0b1 -dependencies = ["polars-runtime-32 == 1.39.0"] +dependencies = ["polars-runtime-32 == 1.39.1"] [project.urls] Homepage = "https://www.pola.rs/" @@ -44,8 +44,8 @@ Changelog = "https://github.com/pola-rs/polars/releases" [project.optional-dependencies] # Runtimes # example: 1.35.0b1 -rt64 = ["polars-runtime-64 == 1.39.0"] -rtcompat = ["polars-runtime-compat == 1.39.0"] +rt64 = ["polars-runtime-64 == 1.39.1"] +rtcompat = ["polars-runtime-compat == 1.39.1"] # NOTE: keep this list in sync with show_versions() and requirements-dev.txt polars_cloud = ["polars_cloud >= 0.4.0"] diff --git a/py-polars/runtime/polars-runtime-32/Cargo.toml b/py-polars/runtime/polars-runtime-32/Cargo.toml index b788a6de52f2..62c01de9a2ec 100644 --- a/py-polars/runtime/polars-runtime-32/Cargo.toml +++ b/py-polars/runtime/polars-runtime-32/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-32" # example: 1.35.0-beta.1 -version = "1.39.0" +version = "1.39.1" edition = "2021" [lib] diff --git a/py-polars/runtime/polars-runtime-64/Cargo.toml b/py-polars/runtime/polars-runtime-64/Cargo.toml index def3d3ebfe04..7c8276a8e7a2 100644 --- a/py-polars/runtime/polars-runtime-64/Cargo.toml +++ b/py-polars/runtime/polars-runtime-64/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-64" # example: 1.35.0-beta.1 -version = "1.39.0" +version = "1.39.1" edition = "2021" [lib] diff --git a/py-polars/runtime/polars-runtime-compat/Cargo.toml b/py-polars/runtime/polars-runtime-compat/Cargo.toml index 5a06f1d77986..247495a2cbd4 100644 --- a/py-polars/runtime/polars-runtime-compat/Cargo.toml +++ b/py-polars/runtime/polars-runtime-compat/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-compat" # example: 1.35.0-beta.1 -version = "1.39.0" +version = "1.39.1" edition = "2021" [lib] diff --git a/py-polars/runtime/template/Cargo.template.toml b/py-polars/runtime/template/Cargo.template.toml index da6226bf9155..03ab6adf473d 100644 --- a/py-polars/runtime/template/Cargo.template.toml +++ b/py-polars/runtime/template/Cargo.template.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-{{%RT_SUFFIX%}}" # example: 1.35.0-beta.1 -version = "1.39.0" +version = "1.39.1" edition = "2021" [lib] diff --git a/py-polars/src/polars/_plr.py b/py-polars/src/polars/_plr.py index a446fe7a22d5..b992b6d9800e 100644 --- a/py-polars/src/polars/_plr.py +++ b/py-polars/src/polars/_plr.py @@ -8,7 +8,7 @@ from polars._cpu_check import check_cpu_flags # example: 1.35.0-beta.1 -PKG_VERSION = "1.39.0" +PKG_VERSION = "1.39.1" def rt_compat() -> None: From 266141e3a1fe62242df8a3f60f2a34e4a02b2ebf Mon Sep 17 00:00:00 2001 From: ritchie46 Date: Mon, 16 Mar 2026 14:26:20 +0100 Subject: [PATCH 07/13] lockfile --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 00e1676d9c5e..fa8ba0b42f13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3550,7 +3550,7 @@ dependencies = [ [[package]] name = "polars-runtime-32" -version = "1.39.0" +version = "1.39.1" dependencies = [ "either", "libc", From 0f5ae40b7d10612d11e6969e806ebf312f0f063b Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Tue, 17 Mar 2026 01:10:30 +1100 Subject: [PATCH 08/13] fix: Default engine as streaming for `collect_batches` (#26932) --- py-polars/src/polars/lazyframe/frame.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/py-polars/src/polars/lazyframe/frame.py b/py-polars/src/polars/lazyframe/frame.py index eeeaa95326bc..1b8b219e3b91 100644 --- a/py-polars/src/polars/lazyframe/frame.py +++ b/py-polars/src/polars/lazyframe/frame.py @@ -4271,6 +4271,10 @@ def collect_batches( >>> for df in lf.collect_batches(): ... print(df) # doctest: +SKIP """ + engine = _select_engine(engine) + + if engine == "auto": + engine = "streaming" class CollectBatches: def __init__(self, inner: Any) -> None: From 39d1d4cc08224466e5d96befb233df3cf783ddac Mon Sep 17 00:00:00 2001 From: Amber Sprenkels Date: Mon, 16 Mar 2026 14:21:24 +0000 Subject: [PATCH 09/13] fix: Fix the loop bounds in `BitmapBuilder::extend_each_repeated_from_slice_unchecked` (#26928) --- crates/polars-arrow/src/bitmap/builder.rs | 3 ++- py-polars/tests/unit/operations/test_inequality_join.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/polars-arrow/src/bitmap/builder.rs b/crates/polars-arrow/src/bitmap/builder.rs index b2ceeb97eec4..ae561fa4faac 100644 --- a/crates/polars-arrow/src/bitmap/builder.rs +++ b/crates/polars-arrow/src/bitmap/builder.rs @@ -242,13 +242,14 @@ impl BitmapBuilder { length: usize, repeats: usize, ) { + debug_assert!(8 * slice.len() >= offset + length); if repeats == 0 { return; } if repeats == 1 { return self.extend_from_slice_unchecked(slice, offset, length); } - for bit_idx in offset..length { + for bit_idx in offset..(offset + length) { let bit = (*slice.get_unchecked(bit_idx / 8) >> (bit_idx % 8)) & 1 != 0; self.extend_constant(repeats, bit); } diff --git a/py-polars/tests/unit/operations/test_inequality_join.py b/py-polars/tests/unit/operations/test_inequality_join.py index b2ab8ea809cc..c087bddf8180 100644 --- a/py-polars/tests/unit/operations/test_inequality_join.py +++ b/py-polars/tests/unit/operations/test_inequality_join.py @@ -15,6 +15,8 @@ if TYPE_CHECKING: from hypothesis.strategies import DrawFn, SearchStrategy + from tests.conftest import PlMonkeyPatch + @pytest.mark.parametrize( ("pred_1", "pred_2"), @@ -690,3 +692,4 @@ def test_boolean_predicate_join_where() -> None: assert "NESTED LOOP JOIN" in plan assert_frame_equal(q.collect(), expect) + From 71873e5d2f4dcb1c83e60eeb5ca23f1524bdd04e Mon Sep 17 00:00:00 2001 From: ritchie46 Date: Tue, 17 Mar 2026 11:07:08 +0100 Subject: [PATCH 10/13] bump --- Cargo.lock | 4 ++-- crates/polars-python/src/c_api/mod.rs | 2 +- py-polars/pyproject.toml | 8 ++++---- py-polars/runtime/polars-runtime-32/Cargo.toml | 2 +- py-polars/runtime/polars-runtime-64/Cargo.toml | 2 +- py-polars/runtime/polars-runtime-compat/Cargo.toml | 2 +- py-polars/runtime/template/Cargo.template.toml | 2 +- py-polars/src/polars/_plr.py | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa8ba0b42f13..d3dd10e85424 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3561,7 +3561,7 @@ dependencies = [ [[package]] name = "polars-runtime-64" -version = "1.39.1" +version = "1.39.2" dependencies = [ "either", "libc", @@ -3572,7 +3572,7 @@ dependencies = [ [[package]] name = "polars-runtime-compat" -version = "1.39.1" +version = "1.39.2" dependencies = [ "either", "libc", diff --git a/crates/polars-python/src/c_api/mod.rs b/crates/polars-python/src/c_api/mod.rs index 83403aac7bca..6e145d1642fe 100644 --- a/crates/polars-python/src/c_api/mod.rs +++ b/crates/polars-python/src/c_api/mod.rs @@ -4,7 +4,7 @@ pub mod allocator; // Since Python Polars cannot share its version into here and we need to be able to build this // package correctly without `py-polars`, we need to mirror the version here. // example: 1.35.0-beta.1 -pub static PYPOLARS_VERSION: &str = "1.39.1"; +pub static PYPOLARS_VERSION: &str = "1.39.2"; // We allow multiple features to be set simultaneously so checking with all-features // is possible. In the case multiple are set or none at all, we set the repr to "unknown". diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 71586beef8ce..591e247de5a2 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Ritchie Vink", email = "ritchie46@gmail.com" }, ] # example: 1.35.0b1 -version = "1.39.1" +version = "1.39.2" license = { file = "LICENSE" } requires-python = ">=3.10" @@ -33,7 +33,7 @@ classifiers = [ "Typing :: Typed", ] # example: 1.35.0b1 -dependencies = ["polars-runtime-32 == 1.39.1"] +dependencies = ["polars-runtime-32 == 1.39.2"] [project.urls] Homepage = "https://www.pola.rs/" @@ -44,8 +44,8 @@ Changelog = "https://github.com/pola-rs/polars/releases" [project.optional-dependencies] # Runtimes # example: 1.35.0b1 -rt64 = ["polars-runtime-64 == 1.39.1"] -rtcompat = ["polars-runtime-compat == 1.39.1"] +rt64 = ["polars-runtime-64 == 1.39.2"] +rtcompat = ["polars-runtime-compat == 1.39.2"] # NOTE: keep this list in sync with show_versions() and requirements-dev.txt polars_cloud = ["polars_cloud >= 0.4.0"] diff --git a/py-polars/runtime/polars-runtime-32/Cargo.toml b/py-polars/runtime/polars-runtime-32/Cargo.toml index 62c01de9a2ec..94d277d34177 100644 --- a/py-polars/runtime/polars-runtime-32/Cargo.toml +++ b/py-polars/runtime/polars-runtime-32/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-32" # example: 1.35.0-beta.1 -version = "1.39.1" +version = "1.39.2" edition = "2021" [lib] diff --git a/py-polars/runtime/polars-runtime-64/Cargo.toml b/py-polars/runtime/polars-runtime-64/Cargo.toml index 7c8276a8e7a2..e71934d8dd63 100644 --- a/py-polars/runtime/polars-runtime-64/Cargo.toml +++ b/py-polars/runtime/polars-runtime-64/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-64" # example: 1.35.0-beta.1 -version = "1.39.1" +version = "1.39.2" edition = "2021" [lib] diff --git a/py-polars/runtime/polars-runtime-compat/Cargo.toml b/py-polars/runtime/polars-runtime-compat/Cargo.toml index 247495a2cbd4..cb75590f1dc2 100644 --- a/py-polars/runtime/polars-runtime-compat/Cargo.toml +++ b/py-polars/runtime/polars-runtime-compat/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-compat" # example: 1.35.0-beta.1 -version = "1.39.1" +version = "1.39.2" edition = "2021" [lib] diff --git a/py-polars/runtime/template/Cargo.template.toml b/py-polars/runtime/template/Cargo.template.toml index 03ab6adf473d..bbdd0e99d862 100644 --- a/py-polars/runtime/template/Cargo.template.toml +++ b/py-polars/runtime/template/Cargo.template.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-{{%RT_SUFFIX%}}" # example: 1.35.0-beta.1 -version = "1.39.1" +version = "1.39.2" edition = "2021" [lib] diff --git a/py-polars/src/polars/_plr.py b/py-polars/src/polars/_plr.py index b992b6d9800e..858d24fef6d7 100644 --- a/py-polars/src/polars/_plr.py +++ b/py-polars/src/polars/_plr.py @@ -8,7 +8,7 @@ from polars._cpu_check import check_cpu_flags # example: 1.35.0-beta.1 -PKG_VERSION = "1.39.1" +PKG_VERSION = "1.39.2" def rt_compat() -> None: From 4f31f727b643b277eeb131c126b7aedc5f3cdc42 Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Wed, 18 Mar 2026 01:02:45 +1100 Subject: [PATCH 11/13] fix: Fix ColumnNotFound due to projection between filter/cache in CSPE (#26946) --- .../src/plans/optimizer/cse/cache_states.rs | 36 +++++++++++++++++-- py-polars/tests/unit/lazyframe/test_cse.py | 26 ++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/crates/polars-plan/src/plans/optimizer/cse/cache_states.rs b/crates/polars-plan/src/plans/optimizer/cse/cache_states.rs index ac5d070b8d91..a8aee8db66cd 100644 --- a/crates/polars-plan/src/plans/optimizer/cse/cache_states.rs +++ b/crates/polars-plan/src/plans/optimizer/cse/cache_states.rs @@ -373,10 +373,42 @@ pub(super) fn set_cache_states( .block_at_cache(1); let lp = pred_pd.optimize(start_lp, lp_arena, expr_arena)?; lp_arena.replace(node, lp.clone()); + + // TODO: Drop filter column if it isn't used after the filter. + + let mut updated_cache_node = node; + + loop { + match lp_arena.get(updated_cache_node) { + IR::Cache { .. } => break, + IR::SimpleProjection { input, .. } => updated_cache_node = *input, + _ => unreachable!(), + } + } + for &parents in &v.parents[1..] { - let node = get_filter_node(parents, lp_arena) + let filter_node = get_filter_node(parents, lp_arena) .expect("expected filter; this is an optimizer bug"); - lp_arena.replace(node, lp.clone()); + + let IR::Filter { input, .. } = lp_arena.get(filter_node) else { + unreachable!() + }; + + let new_lp = match lp_arena.get(*input) { + IR::SimpleProjection { input, columns } => { + debug_assert!(matches!(lp_arena.get(*input), IR::Cache { .. })); + IR::SimpleProjection { + input: updated_cache_node, + columns: columns.clone(), + } + }, + ir => { + debug_assert!(matches!(ir, IR::Cache { .. })); + lp_arena.get(updated_cache_node).clone() + }, + }; + + lp_arena.replace(filter_node, new_lp); } } else { let child = *v.children.first().unwrap(); diff --git a/py-polars/tests/unit/lazyframe/test_cse.py b/py-polars/tests/unit/lazyframe/test_cse.py index 69fa71de1230..bc7a996eeb46 100644 --- a/py-polars/tests/unit/lazyframe/test_cse.py +++ b/py-polars/tests/unit/lazyframe/test_cse.py @@ -1330,3 +1330,29 @@ def f_b(df: pl.DataFrame) -> pl.DataFrame: schema={"A": pl.Int32, "PART": pl.Int32, "B": pl.Int32}, ) assert_frame_equal(out, expected) + + +def test_cspe_projection_between_filter_and_cache_26916() -> None: + lf = pl.LazyFrame( + { + "VendorID": [1, 1, 2, 2, 2], + "total_amount": [10.0, 20.0, 30.0, 40.0, 50.0], + "passenger_count": [1, 2, 1, 3, 2], + } + ) + + g1 = lf.group_by("VendorID").agg(pl.mean("total_amount")) + g2 = lf.group_by("VendorID").agg(pl.mean("passenger_count")) + + q = g1.join(g2, "VendorID").filter(VendorID=1) + + assert_frame_equal( + q.collect(), + pl.DataFrame( + { + "VendorID": 1, + "total_amount": 15.0, + "passenger_count": 1.5, + } + ), + ) From ebe3364c6d32edda793af01b41f186aacc86bea0 Mon Sep 17 00:00:00 2001 From: ritchie46 Date: Fri, 20 Mar 2026 09:41:42 +0100 Subject: [PATCH 12/13] lockfile --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index d3dd10e85424..2424553159a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3550,7 +3550,7 @@ dependencies = [ [[package]] name = "polars-runtime-32" -version = "1.39.1" +version = "1.39.2" dependencies = [ "either", "libc", From 1cd236c60c01572c5ec6fdd252d8b20218d7b440 Mon Sep 17 00:00:00 2001 From: ritchie46 Date: Fri, 20 Mar 2026 09:47:34 +0100 Subject: [PATCH 13/13] version + lockfile --- Cargo.lock | 6 +++--- crates/polars-python/src/c_api/mod.rs | 2 +- py-polars/pyproject.toml | 8 ++++---- py-polars/runtime/polars-runtime-32/Cargo.toml | 2 +- py-polars/runtime/polars-runtime-64/Cargo.toml | 2 +- py-polars/runtime/polars-runtime-compat/Cargo.toml | 2 +- py-polars/runtime/template/Cargo.template.toml | 2 +- py-polars/src/polars/_plr.py | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2424553159a5..31db038b3705 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3550,7 +3550,7 @@ dependencies = [ [[package]] name = "polars-runtime-32" -version = "1.39.2" +version = "1.39.3" dependencies = [ "either", "libc", @@ -3561,7 +3561,7 @@ dependencies = [ [[package]] name = "polars-runtime-64" -version = "1.39.2" +version = "1.39.3" dependencies = [ "either", "libc", @@ -3572,7 +3572,7 @@ dependencies = [ [[package]] name = "polars-runtime-compat" -version = "1.39.2" +version = "1.39.3" dependencies = [ "either", "libc", diff --git a/crates/polars-python/src/c_api/mod.rs b/crates/polars-python/src/c_api/mod.rs index 6e145d1642fe..a01dc20c7417 100644 --- a/crates/polars-python/src/c_api/mod.rs +++ b/crates/polars-python/src/c_api/mod.rs @@ -4,7 +4,7 @@ pub mod allocator; // Since Python Polars cannot share its version into here and we need to be able to build this // package correctly without `py-polars`, we need to mirror the version here. // example: 1.35.0-beta.1 -pub static PYPOLARS_VERSION: &str = "1.39.2"; +pub static PYPOLARS_VERSION: &str = "1.39.3"; // We allow multiple features to be set simultaneously so checking with all-features // is possible. In the case multiple are set or none at all, we set the repr to "unknown". diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 591e247de5a2..2786285eeac4 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Ritchie Vink", email = "ritchie46@gmail.com" }, ] # example: 1.35.0b1 -version = "1.39.2" +version = "1.39.3" license = { file = "LICENSE" } requires-python = ">=3.10" @@ -33,7 +33,7 @@ classifiers = [ "Typing :: Typed", ] # example: 1.35.0b1 -dependencies = ["polars-runtime-32 == 1.39.2"] +dependencies = ["polars-runtime-32 == 1.39.3"] [project.urls] Homepage = "https://www.pola.rs/" @@ -44,8 +44,8 @@ Changelog = "https://github.com/pola-rs/polars/releases" [project.optional-dependencies] # Runtimes # example: 1.35.0b1 -rt64 = ["polars-runtime-64 == 1.39.2"] -rtcompat = ["polars-runtime-compat == 1.39.2"] +rt64 = ["polars-runtime-64 == 1.39.3"] +rtcompat = ["polars-runtime-compat == 1.39.3"] # NOTE: keep this list in sync with show_versions() and requirements-dev.txt polars_cloud = ["polars_cloud >= 0.4.0"] diff --git a/py-polars/runtime/polars-runtime-32/Cargo.toml b/py-polars/runtime/polars-runtime-32/Cargo.toml index 94d277d34177..04bcbe613cdd 100644 --- a/py-polars/runtime/polars-runtime-32/Cargo.toml +++ b/py-polars/runtime/polars-runtime-32/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-32" # example: 1.35.0-beta.1 -version = "1.39.2" +version = "1.39.3" edition = "2021" [lib] diff --git a/py-polars/runtime/polars-runtime-64/Cargo.toml b/py-polars/runtime/polars-runtime-64/Cargo.toml index e71934d8dd63..19fc8084f0fc 100644 --- a/py-polars/runtime/polars-runtime-64/Cargo.toml +++ b/py-polars/runtime/polars-runtime-64/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-64" # example: 1.35.0-beta.1 -version = "1.39.2" +version = "1.39.3" edition = "2021" [lib] diff --git a/py-polars/runtime/polars-runtime-compat/Cargo.toml b/py-polars/runtime/polars-runtime-compat/Cargo.toml index cb75590f1dc2..4b494f05e563 100644 --- a/py-polars/runtime/polars-runtime-compat/Cargo.toml +++ b/py-polars/runtime/polars-runtime-compat/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-compat" # example: 1.35.0-beta.1 -version = "1.39.2" +version = "1.39.3" edition = "2021" [lib] diff --git a/py-polars/runtime/template/Cargo.template.toml b/py-polars/runtime/template/Cargo.template.toml index bbdd0e99d862..e54dd0735171 100644 --- a/py-polars/runtime/template/Cargo.template.toml +++ b/py-polars/runtime/template/Cargo.template.toml @@ -1,7 +1,7 @@ [package] name = "polars-runtime-{{%RT_SUFFIX%}}" # example: 1.35.0-beta.1 -version = "1.39.2" +version = "1.39.3" edition = "2021" [lib] diff --git a/py-polars/src/polars/_plr.py b/py-polars/src/polars/_plr.py index 858d24fef6d7..9a83c9e389cf 100644 --- a/py-polars/src/polars/_plr.py +++ b/py-polars/src/polars/_plr.py @@ -8,7 +8,7 @@ from polars._cpu_check import check_cpu_flags # example: 1.35.0-beta.1 -PKG_VERSION = "1.39.2" +PKG_VERSION = "1.39.3" def rt_compat() -> None: