pola-rs · gab23r · Apr 14, 2026 · Apr 14, 2026 · Apr 15, 2026
@@ -154,7 +154,7 @@
   "Schema_for_DataType_and_null": "6f5ccfa2d9f3beca900c1b4ded6dade8173e200287e286f187e69c66bb1bedca",
   "Schema_for_Field_and_Map_of_string": "2f9fffc68f13a663609cf6184b816a65b4971f37684ee4f7fe8642c53358a20c",
   "SearchSortedSide": "3976138cd5159a641e118a57aedcb079ef8b131b84fdcb102413eb8802a27403",
-  "Selector": "fcdb32f1c6ccb9f71a57afd77ba233a38888e736dc7b3e4562c54a20e9a0a290",
+  "Selector": "678b9968c5c46b913a5ee380c7e220f001b853a3136ee41bfd45e589d92355c6",
   "SequenceKind": "bff6b860f53c19db2c7d1a02b57b2c27f683a69ddd1668c44eb23c24b436a138",
   "SerializeOptions": "c16fee7c896396e39b9cc6ff4da53315e79edc3846738f8bafeea104e4bc0dc4",
   "Series": "5bbddd4f899afa592c318b20bb8d0bdfe2877fa5bf1a63d9cd0da908ac3aec0e",

@@ -168,6 +168,11 @@ pub enum Selector {
 
     Wildcard,
     Empty,
+
+    /// Select all columns not already selected by preceding expressions.
+    /// This is handled specially in `rewrite_projections` - the preceding output
+    /// columns are added to `ignored_columns` before expansion.
+    Remaining,
 }
 
 fn dtype_selector(
@@ -265,7 +270,9 @@ impl Selector {
                         .cloned(),
                 )
             },
-            Self::Wildcard => PlIndexSet::from_iter(
+            // Remaining behaves like Wildcard - the difference is that
+            // rewrite_projections adds prior output columns to ignored_columns
+            Self::Wildcard | Self::Remaining => PlIndexSet::from_iter(
                 schema
                     .iter_names()
                     .filter(|name| !ignored_columns.contains(*name))
@@ -304,7 +311,7 @@ impl Selector {
 
             Self::ByDType(dts) => Some(dts.clone()),
 
-            Self::ByName { .. } | Self::ByIndex { .. } | Self::Matches(_) => None,
+            Self::ByName { .. } | Self::ByIndex { .. } | Self::Matches(_) | Self::Remaining => None,
         }
     }
 
@@ -671,6 +678,7 @@ impl fmt::Display for Selector {
             },
             Self::Matches(s) => write!(f, "cs.matches(\"{s}\")"),
             Self::Wildcard => f.write_str("cs.all()"),
+            Self::Remaining => f.write_str("cs.remaining()"),
             Self::Empty => f.write_str("cs.empty()"),
         }
     }

@@ -4,6 +4,7 @@ use super::*;
 use crate::constants::{
     POLARS_ELEMENT, POLARS_STRUCTFIELDS, get_pl_element_name, get_pl_structfields_name,
 };
+use crate::utils::expr_output_name;
 
 pub fn prepare_projection(
     exprs: Vec<Expr>,
@@ -45,20 +46,96 @@ pub fn rewrite_projections(
     schema: &Schema,
     opt_flags: &mut OptFlags,
 ) -> PolarsResult<Vec<Expr>> {
+    // Check if any expression contains `Selector::Remaining`.
+    // If so, we need to track output columns as we expand.
+    let has_remaining = exprs.iter().any(expr_contains_remaining);
+
+    if !has_remaining {
+        // Fast path: no Remaining selector, expand normally
+        let mut result = Vec::with_capacity(exprs.len() + schema.len());
+        for expr in &exprs {
+            expand_expression(
+                expr,
+                ignored_selector_columns,
+                schema,
+                &mut result,
+                opt_flags,
+            )?;
+        }
+        return Ok(result);
+    }
+
+    // When Remaining is present, we accumulate output column names as we expand.
+    // Each Remaining selector excludes columns from ALL other expressions.
+    // To achieve this, we first collect all non-Remaining output names,
+    // then expand each expression with those names added to ignored_columns.
+    let mut all_output_names = PlHashSet::default();
+
+    // First, collect output names from all non-Remaining expressions
+    for expr in &exprs {
+        if !expr_contains_remaining(expr) {
+            let mut expanded = Vec::new();
+            expand_expression(
+                expr,
+                ignored_selector_columns,
+                schema,
+                &mut expanded,
+                opt_flags,
+            )?;
+            for e in &expanded {
+                if let Ok(name) = expr_output_name(e) {
+                    all_output_names.insert(name);
+                }
+            }
+        }
+    }
+
+    // Now expand all expressions, adding collected names to ignored_columns for Remaining
+    let mut ignored_with_outputs = ignored_selector_columns.clone();
+    ignored_with_outputs.extend(all_output_names);
+
     let mut result = Vec::with_capacity(exprs.len() + schema.len());
     for expr in &exprs {
-        expand_expression(
-            expr,
-            ignored_selector_columns,
-            schema,
-            &mut result,
-            opt_flags,
-        )?;
+        if expr_contains_remaining(expr) {
+            // Expand with all output names excluded
+            expand_expression(expr, &ignored_with_outputs, schema, &mut result, opt_flags)?;
+        } else {
+            // Expand normally
+            expand_expression(
+                expr,
+                ignored_selector_columns,
+                schema,
+                &mut result,
+                opt_flags,
+            )?;
+        }
     }
 
     Ok(result)
 }
 
+/// Check if an expression contains `Selector::Remaining` (including nested in selector combinations).
+fn expr_contains_remaining(expr: &Expr) -> bool {
+    expr.into_iter().any(|e| match e {
+        Expr::Selector(selector) => selector_contains_remaining(selector),
+        _ => false,
+    })
+}
+
+/// Check if a selector contains `Remaining` (recursively for combinations).
+fn selector_contains_remaining(selector: &Selector) -> bool {
+    match selector {
+        Selector::Remaining => true,
+        Selector::Union(lhs, rhs)
+        | Selector::Difference(lhs, rhs)
+        | Selector::ExclusiveOr(lhs, rhs)
+        | Selector::Intersect(lhs, rhs) => {
+            selector_contains_remaining(lhs) || selector_contains_remaining(rhs)
+        },
+        _ => false,
+    }
+}
+
 fn toggle_cse_for_structs(opt_flags: &mut OptFlags) {
     if opt_flags.contains(OptFlags::EAGER) && !opt_flags.contains(OptFlags::NEW_STREAMING) {
         use polars_core::config::verbose;

@@ -238,6 +238,11 @@ impl PySelector {
         dsl::functions::all().into()
     }
 
+    #[staticmethod]
+    fn remaining() -> Self {
+        Selector::Remaining.into()
+    }
+
     fn hash(&self) -> u64 {
         let mut hasher = std::hash::DefaultHasher::default();
         self.inner.hash(&mut hasher);

@@ -2047,6 +2047,8 @@ class PySelector:
     def empty() -> PySelector: ...
     @staticmethod
     def all() -> PySelector: ...
+    @staticmethod
+    def remaining() -> PySelector: ...
     def hash(self) -> int: ...
 
 class PyOptFlags:

@@ -80,6 +80,7 @@
     "matches",
     "nested",
     "numeric",
+    "remaining",
     "signed_integer",
     "starts_with",
     "string",
@@ -2808,6 +2809,69 @@ def numeric() -> Selector:
     return Selector._from_pyselector(PySelector.numeric())
 
 
+@unstable()
+def remaining() -> Selector:
+    """
+    Select all columns whose names do not appear as output names of other expressions.
+
+    .. warning::
+        This functionality is considered **unstable**. It may be changed
+        at any point without it being considered a breaking change.
+
+    This selector is useful in `select` and `with_columns` operations when you want
+    to perform an operation on specific columns while also keeping all other columns
+    that are not already produced by other expressions.
+
+    See Also
+    --------
+    all : Select all columns.
+    exclude : Select all columns except those matching the given columns, datatypes,
+        or selectors.
+
+    Examples
+    --------
+    >>> import polars as pl
+    >>> import polars.selectors as cs
+    >>> df = pl.DataFrame(
+    ...     {
+    ...         "b": [4, 5, 6],
+    ...         "a": [1, 2, 3],
+    ...         "c": [7, 8, 9],
+    ...     }
+    ... )
+
+    Transform column "a" and reorder columns:
+
+    >>> df.select(pl.col("a") * 10, cs.remaining())
+    shape: (3, 3)
+    ┌─────┬─────┬─────┐
+    │ a   ┆ b   ┆ c   │
+    │ --- ┆ --- ┆ --- │
+    │ i64 ┆ i64 ┆ i64 │
+    ╞═════╪═════╪═════╡
+    │ 10  ┆ 4   ┆ 7   │
+    │ 20  ┆ 5   ┆ 8   │
+    │ 30  ┆ 6   ┆ 9   │
+    └─────┴─────┴─────┘
+
+    Use with `with_columns` to add new columns while transforming existing ones:
+
+    >>> df.select(pl.col("a"), cs.remaining().cast(pl.Float64), pl.col("a").alias("d"))
+    shape: (3, 4)
+    ┌─────┬─────┬─────┬─────┐
+    │ a   ┆ b   ┆ c   ┆ d   │
+    │ --- ┆ --- ┆ --- ┆ --- │
+    │ i64 ┆ f64 ┆ f64 ┆ i64 │
+    ╞═════╪═════╪═════╪═════╡
+    │ 1   ┆ 4.0 ┆ 7.0 ┆ 1   │
+    │ 2   ┆ 5.0 ┆ 8.0 ┆ 2   │
+    │ 3   ┆ 6.0 ┆ 9.0 ┆ 3   │
+    └─────┴─────┴─────┴─────┘
+
+    """
+    return Selector._from_pyselector(PySelector.remaining())
+
+
 def object() -> Selector:
     """
     Select all object columns.

@@ -1158,3 +1158,128 @@ def test_multiline_colname_matches() -> None:
         cs.contains(prefix).alias("contains"),
     )
     assert res.columns == ["starts_with", "ends_with", "contains"]
+
+
+def test_selector_remaining_12067() -> None:
+    """Test cs.remaining() selector that selects columns not explicitly named."""
+    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+
+    # Basic case: one explicit column, remaining selects the rest
+    result = df.select(pl.col("a") * 10, cs.remaining())
+    assert result.columns == ["a", "b", "c"]
+    assert result["a"].to_list() == [10, 20, 30]
+    assert result["b"].to_list() == [4, 5, 6]
+    assert result["c"].to_list() == [7, 8, 9]
+
+    # Expression using multiple columns - only output name is excluded
+    result = df.select(pl.col("a") + pl.col("b"), cs.remaining())
+    assert result.columns == ["a", "b", "c"]
+    assert result["a"].to_list() == [5, 7, 9]  # a + b
+    assert result["b"].to_list() == [4, 5, 6]
+    assert result["c"].to_list() == [7, 8, 9]
+
+    # with_columns preserves original columns
+    result = df.with_columns(pl.col("a").alias("d"), cs.remaining().cast(pl.Float64))
+    assert result.columns == ["a", "b", "c", "d"]
+    assert result["b"].dtype == pl.Float64
+    assert result["c"].dtype == pl.Float64
+    assert result["d"].to_list() == [1, 2, 3]
+
+    # No explicit columns: remaining selects all
+    result = df.select(cs.remaining())
+    assert result.columns == ["a", "b", "c"]
+
+
+def test_selector_remaining_with_selector_combination() -> None:
+    """Test cs.remaining() combined with other selectors."""
+    df = pl.DataFrame(
+        {
+            "int_a": [1, 2],
+            "int_b": [3, 4],
+            "str_c": ["x", "y"],
+            "float_d": [1.0, 2.0],
+        },
+        schema_overrides={"int_a": pl.Int32, "int_b": pl.Int32, "float_d": pl.Float32},
+    )
+
+    # Use selector with remaining
+    result = df.select(cs.integer().cast(pl.Int64), cs.remaining())
+    assert result.columns == ["int_a", "int_b", "str_c", "float_d"]
+    assert result["int_a"].dtype == pl.Int64
+    assert result["int_b"].dtype == pl.Int64
+    assert result["str_c"].dtype == pl.String
+    assert result["float_d"].dtype == pl.Float32
+
+
+def test_selector_remaining_with_contains_no_duplicate() -> None:
+    """Test cs.remaining() correctly excludes columns selected by cs.contains()."""
+    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+
+    # This should not raise a DuplicateError - remaining should exclude
+    # columns selected by cs.contains("a") and explicitly named "b"
+    result = df.select(cs.contains("a") * 10, cs.remaining(), "b")
+    assert result.columns == ["a", "c", "b"]
+    assert result["a"].to_list() == [10, 20, 30]
+    assert result["c"].to_list() == [7, 8, 9]
+    assert result["b"].to_list() == [4, 5, 6]
+
+
+def test_selector_remaining_first_excludes_output_names() -> None:
+    """Test cs.remaining() excludes output names, not referenced columns."""
+    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+
+    # remaining() comes first, followed by expression using a and b
+    # The expression outputs column "a", so remaining should select "b" and "c"
+    result = df.select(cs.remaining(), pl.col("a") + pl.col("b"))
+    assert result.columns == ["b", "c", "a"]
+    assert result["b"].to_list() == [4, 5, 6]
+    assert result["c"].to_list() == [7, 8, 9]
+    assert result["a"].to_list() == [5, 7, 9]  # a + b
+
+
+def test_selector_remaining_with_selector_arithmetic() -> None:
+    """Test cs.remaining() works with selector arithmetic."""
+    df = pl.DataFrame(
+        {
+            "a": [1, 2],
+            "b": [3, 4],
+            "c": [5, 6],
+            "group": ["x", "y"],
+        }
+    )
+
+    # remaining() - cs.string() should exclude 'a' (explicit) and string columns
+    result = df.select(pl.col("a"), cs.remaining() - cs.string())
+    assert result.columns == ["a", "b", "c"]
+
+    # remaining() & cs.numeric() should exclude 'a' and keep only numeric remaining
+    result = df.select(pl.col("a"), cs.remaining() & cs.numeric())
+    assert result.columns == ["a", "b", "c"]
+
+    # remaining() | cs.string() should exclude 'a', include remaining + string
+    result = df.select(pl.col("a"), cs.remaining() | cs.string())
+    assert result.columns == ["a", "b", "c", "group"]
+
+
+def test_selector_remaining_in_group_by_agg() -> None:
+    """Test cs.remaining() works correctly in group_by().agg() context."""
+    df = pl.DataFrame(
+        {
+            "a": [1, 2, 3, 4],
+            "b": [4, 5, 6, 7],
+            "c": [7, 8, 9, 10],
+            "group": ["x", "x", "y", "y"],
+        }
+    )
+
+    # remaining() in agg should exclude group key AND explicitly aggregated columns
+    result = df.group_by("group").agg(pl.col("a").sum(), cs.remaining().sum())
+
+    # Should have group + a + remaining (b, c)
+    assert set(result.columns) == {"group", "a", "b", "c"}
+
+    # Verify values are correct sums
+    result_sorted = result.sort("group")
+    assert result_sorted["a"].to_list() == [3, 7]  # x: 1+2, y: 3+4
+    assert result_sorted["b"].to_list() == [9, 13]  # x: 4+5, y: 6+7
+    assert result_sorted["c"].to_list() == [15, 19]  # x: 7+8, y: 9+10