etal · etal · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/environment-dev.yml b/environment-dev.yml
@@ -5,6 +5,7 @@ channels:
 dependencies:
   - python=3.11
   # Core dependencies (from requirements/core.txt)
+  - bioframe>=0.7.2
   - biopython>=1.85
   - matplotlib>=3.9.0
   - numpy>=2.2.3

diff --git a/pyproject.toml b/pyproject.toml
@@ -63,7 +63,12 @@ optional-dependencies = {test = {file = "requirements/tests.txt"}}
 
 [tool.pytest.ini_options]
 testpaths = ["test"]
-filterwarnings = ["error"]
+filterwarnings = [
+    "error",
+    # bioframe <=0.8.0 uses df.groupby(['col']).groups with a single-element
+    # list, which pandas 3.0 deprecates via Pandas4Warning.
+    "ignore:In a future version, the keys of:Warning",
+]
 
 [tool.coverage.run]
 branch = true

diff --git a/requirements/core.txt b/requirements/core.txt
@@ -1,3 +1,4 @@
+bioframe >= 0.7.2
 biopython >= 1.85
 matplotlib >= 3.9.0
 numpy >= 2.2.3

diff --git a/requirements/min.txt b/requirements/min.txt
@@ -1,3 +1,4 @@
+bioframe == 0.7.2
 biopython == 1.85
 matplotlib == 3.9.0
 numpy == 2.2.3

diff --git a/skgenome/cut.py b/skgenome/cut.py
@@ -0,0 +1,72 @@
+"""DataFrame-level cutting operations.
+
+Split genomic regions at boundaries defined by another set of regions,
+similar to bedtools intersect -split.
+
+The functions here operate on pandas DataFrame and Series instances, not
+GenomicArray types.
+
+"""
+
+from __future__ import annotations
+
+import itertools
+import numpy as np
+import pandas as pd
+
+
+def cut(table: pd.DataFrame, other: pd.DataFrame) -> pd.DataFrame:
+    """Split intervals in ``table`` at the boundary coordinates of ``other``.
+
+    Each interval in ``table`` is split wherever a start or end coordinate
+    from ``other`` falls strictly within it. Every resulting piece inherits
+    all data columns from its source row.
+
+    Parameters
+    ----------
+    table : DataFrame
+        Genomic intervals with at least chromosome, start, end columns.
+    other : DataFrame
+        Intervals whose start/end positions define the cut points.
+
+    Returns
+    -------
+    DataFrame
+        A copy of ``table`` with rows split at the breakpoints from ``other``.
+    """
+    if table.empty or other.empty:
+        return table
+
+    # Collect breakpoints per chromosome from other
+    other_bp: dict[str, np.ndarray] = {}
+    for chrom, grp in other.groupby("chromosome", sort=False):
+        other_bp[chrom] = np.unique(
+            np.concatenate([grp["start"].to_numpy(), grp["end"].to_numpy()])
+        )
+
+    result_chunks: list[pd.DataFrame] = []
+    for chrom, chrom_table in table.groupby("chromosome", sort=False):
+        if chrom not in other_bp:
+            result_chunks.append(chrom_table)
+            continue
+
+        breakpoints = other_bp[chrom]
+        cut_rows: list[tuple] = []
+        for row in chrom_table.itertuples(index=False):
+            # Breakpoints strictly inside (row.start, row.end)
+            inner = breakpoints[(breakpoints > row.start) & (breakpoints < row.end)]
+            if len(inner) == 0:
+                cut_rows.append(row)
+            else:
+                bounds = np.concatenate([[row.start], inner, [row.end]])
+                for seg_start, seg_end in itertools.pairwise(bounds):
+                    cut_rows.append(
+                        row._replace(start=int(seg_start), end=int(seg_end))
+                    )
+        result_chunks.append(
+            pd.DataFrame.from_records(cut_rows, columns=chrom_table.columns)
+        )
+
+    if not result_chunks:
+        return table.iloc[:0]
+    return pd.concat(result_chunks, ignore_index=True)
diff --git a/skgenome/gary.py b/skgenome/gary.py
@@ -6,12 +6,16 @@
 from collections import OrderedDict
 from typing import Any, TypeVar, TYPE_CHECKING
 
+import bioframe
 import numpy as np
 import pandas as pd
 
 from .chromsort import sorter_chrom
+from .cut import cut
 from .intersect import by_ranges, into_ranges, iter_ranges, iter_slices, Numeric
-from .merge import flatten, merge
+
+_BF_COLS = ("chromosome", "start", "end")
+from .merge import flatten, merge, squash
 from .rangelabel import to_label
 from .subtract import subtract
 from .subdivide import subdivide
@@ -683,10 +687,9 @@ def sort_columns(self) -> None:
 
     # Genome arithmetic
 
-    def cut(self, other, combine=None):
-        """Split this array's regions at the boundaries in `other`."""
-        # TODO
-        return NotImplemented
+    def cut(self: _GA, other: GenomicArray) -> _GA:
+        """Split this array's regions at the boundaries in ``other``."""
+        return self.as_dataframe(cut(self.data, other.data))
 
     def flatten(
         self,
@@ -703,19 +706,33 @@ def intersection(self, other: GenomicArray, mode: str = "outer") -> GenomicArray
 
         The extra fields of `self`, but not `other`, are retained in the output.
         """
-        # TODO options for which extra fields to keep
-        #   by default, keep just the fields in 'table'
         if mode == "trim":
-            # Slower
             chunks = [
                 chunk.data
                 for _, chunk in self.by_ranges(other, mode=mode, keep_empty=False)
             ]
             return self.as_dataframe(pd.concat(chunks))
-        # Faster
-        slices = iter_slices(self.data, other.data, mode, False)
-        indices = np.concatenate(list(slices))
-        return self.as_dataframe(self.data.loc[indices])
+        if not len(self) or not len(other):
+            return self.as_dataframe(self.data.iloc[:0])
+        pairs = bioframe.overlap(
+            self.data,
+            other.data,
+            how="inner",
+            cols1=_BF_COLS,
+            cols2=_BF_COLS,
+            return_index=True,
+            return_input=True,
+        )
+        if mode == "inner":
+            # Keep only self bins fully contained within an other bin
+            pairs = pairs[
+                (pairs["start"] >= pairs["start_"]) & (pairs["end"] <= pairs["end_"])
+            ]
+        # Sort by other-index then self-index to match by_ranges ordering
+        pairs = pairs.sort_values(["index_", "index"])
+        # Return self rows (with duplicates, one per overlap pair)
+        self_indices = pairs["index"].to_numpy()
+        return self.as_dataframe(self.data.loc[self_indices])
 
     def merge(
         self,
@@ -765,10 +782,22 @@ def resize_ranges(
         # Don't modify the original
         return self.as_dataframe(table.copy())
 
-    def squash(self, combine=None):
-        """Combine some groups of rows, by some criteria, into single rows."""
-        # TODO
-        return NotImplemented
+    def squash(
+        self: _GA,
+        by: str | None = None,
+        combine: dict[str, Callable] | None = None,
+    ) -> _GA:
+        """Combine consecutive adjacent rows into single rows.
+
+        Parameters
+        ----------
+        by : str or None
+            If given, only combine consecutive rows with the same value
+            in this column (e.g. ``"gene"``).
+        combine : dict or None
+            Column-to-function mappings for aggregation.
+        """
+        return self.as_dataframe(squash(self.data, by=by, combine=combine))
 
     def subdivide(
         self, avg_size: int, min_size: int = 0, verbose: bool = False

diff --git a/skgenome/intersect.py b/skgenome/intersect.py
@@ -241,15 +241,3 @@ def _irange_nested(
                 region_mask[int(end_idx) :] = 0
 
         yield region_mask, start_val, end_val
-
-
-def venn(table, other, mode: str):
-    # TODO -- implement 'venn' via fjoin algorithm
-    # 'cut' table at all 'other' boundaries
-    #   -> extra column '_venn_':int (0, 1, 2)
-    #       0=self only, 1=both, 2=other only
-    #   -> 'cut' just drops the '_venn_' column
-    #   -> 'subtract' drops 1 and 2?
-    #       (is that faster? probably not)
-    #   -> 'jaccard' does math with it...
-    return table