Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ccae8ff
Implement st_haszm using WKBBytesExecutor instead (missing one last e…
petern48 Oct 1, 2025
07aa206
Add note about handling last edge case
petern48 Oct 1, 2025
cf031fb
Minor fix to the comments
petern48 Oct 1, 2025
526fc3b
Fix pre-commit
petern48 Oct 1, 2025
a491d91
Fix cargo clippy
petern48 Oct 2, 2025
cf90bcf
Save progress
petern48 Oct 3, 2025
22a6087
Pull out dimension calculation logic into new wkb_header.rs
petern48 Oct 4, 2025
5c616af
Add MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB fixture
petern48 Oct 4, 2025
207ecb1
Fix dimension calculation to support all collection types and add fix…
petern48 Oct 4, 2025
43009f8
Fix clippy and clean up
petern48 Oct 4, 2025
1078bdd
Remove public byte_order method since it's not needed atm
petern48 Oct 4, 2025
4d4e7e0
Perform all wkb_header operations lazily and cache the values as Opti…
petern48 Oct 4, 2025
dfd6c1a
Add python integration test benches
petern48 Oct 4, 2025
0ef812d
Add tests for wkb_header
petern48 Oct 4, 2025
075d6e6
Apply suggestion from @paleolimbot
petern48 Oct 5, 2025
491b3c7
Remaining clean up
petern48 Oct 5, 2025
7efccc0
Update to method to dimensions plural
petern48 Oct 5, 2025
06501e5
Rename method to try_new
petern48 Oct 5, 2025
1b397fd
Update fixture to be multipoint ((1 2 3)) instead of all zeros
petern48 Oct 5, 2025
9ce9f08
Implement refactor
petern48 Oct 7, 2025
617f9b8
Remove inferred dimension case
petern48 Oct 7, 2025
96311fa
Move logic to st_haszm
petern48 Oct 7, 2025
d1d4fc8
Add empty_geometry_first_coord_dimensions test
petern48 Oct 7, 2025
573f7c8
Add test for size
petern48 Oct 7, 2025
b5984dc
Add tests
petern48 Oct 7, 2025
8c1d2f0
Implement fix for first_xy POLYGON logic
petern48 Oct 7, 2025
dc49aac
clean up
petern48 Oct 7, 2025
89ebb4c
Rename from first_coord_dimensions to first_geom_dimensions and adjus…
petern48 Oct 7, 2025
bdc0fae
Update name of method in st_haszm and update some sedona_internal_err…
petern48 Oct 7, 2025
6b9a6ef
Merge branch 'main' into st_haszm_wkb_bytes
petern48 Oct 23, 2025
3c83ff9
Use SedonaGeometryError and remove sedona and datafusion common depen…
petern48 Oct 23, 2025
597c22e
Fix write_geometry arg after merge
petern48 Oct 23, 2025
55b94ef
Create and use read_u32() helper function
petern48 Oct 23, 2025
b77a762
Move all functions into WKbHeader as methods also rename helper to fi…
petern48 Oct 23, 2025
a2218a9
Catch the error instead of hiding it in first_geom_idx
petern48 Oct 23, 2025
b8f1cc3
Use new WkbBuffer that calculates values by consuming bytes and keepi…
petern48 Oct 23, 2025
4e3e525
Fix st_haszm to map sedona errors
petern48 Oct 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions benchmarks/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,42 @@ def queries():

benchmark(queries)

@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_hasm(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_HasM(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
@pytest.mark.parametrize(
"table",
[
"collections_simple",
"collections_complex",
],
)
def test_st_hasz(self, benchmark, eng, table):
eng = self._get_eng(eng)

def queries():
eng.execute_and_collect(f"SELECT ST_HasZ(geom1) from {table}")

benchmark(queries)

@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
Expand Down
5 changes: 5 additions & 0 deletions python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,13 @@ def test_st_geomfromwkb(eng, geom):
("LINESTRING Z (0 0 0, 1 1 1)", True),
("POLYGON EMPTY", False),
("MULTIPOINT ((0 0), (1 1))", False),
("MULTIPOINT Z ((0 0 0))", True),
("MULTIPOINT ZM ((0 0 0 0))", True),
("GEOMETRYCOLLECTION EMPTY", False),
# Z-dim specified only in the nested geometry
("GEOMETRYCOLLECTION (POINT Z (0 0 0))", True),
# Z-dim specified on both levels
("GEOMETRYCOLLECTION Z (POINT Z (0 0 0))", True),
("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT Z (0 0 0)))", True),
],
)
Expand Down
101 changes: 72 additions & 29 deletions rust/sedona-functions/src/st_haszm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,17 @@
// under the License.
use std::sync::Arc;

use crate::executor::WkbExecutor;
use crate::executor::WkbBytesExecutor;
use arrow_array::builder::BooleanBuilder;
use arrow_schema::DataType;
use datafusion_common::error::Result;
use datafusion_common::{error::Result, DataFusionError};
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility,
};
use geo_traits::GeometryCollectionTrait;
use geo_traits::{Dimensions, GeometryTrait};
use sedona_common::sedona_internal_err;
use geo_traits::Dimensions;
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
use sedona_geometry::wkb_header::WkbHeader;
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
use wkb::reader::Wkb;

pub fn st_hasz_udf() -> SedonaScalarUDF {
SedonaScalarUDF::new(
Expand Down Expand Up @@ -91,13 +89,13 @@ impl SedonaScalarKernel for STHasZm {
_ => unreachable!(),
};

let executor = WkbExecutor::new(arg_types, args);
let executor = WkbBytesExecutor::new(arg_types, args);
let mut builder = BooleanBuilder::with_capacity(executor.num_iterations());

executor.execute_wkb_void(|maybe_item| {
match maybe_item {
Some(item) => {
builder.append_option(invoke_scalar(&item, dim_index)?);
builder.append_option(invoke_scalar(item, dim_index)?);
}
None => builder.append_null(),
}
Expand All @@ -108,27 +106,35 @@ impl SedonaScalarKernel for STHasZm {
}
}

fn invoke_scalar(item: &Wkb, dim_index: usize) -> Result<Option<bool>> {
match item.as_type() {
geo_traits::GeometryType::GeometryCollection(collection) => {
if collection.num_geometries() == 0 {
Ok(Some(false))
} else {
// PostGIS doesn't allow creating a GeometryCollection with geometries of different dimensions
// so we can just check the dimension of the first one
let first_geom = unsafe { collection.geometry_unchecked(0) };
invoke_scalar(first_geom, dim_index)
}
}
_ => {
let geom_dim = item.dim();
match dim_index {
2 => Ok(Some(matches!(geom_dim, Dimensions::Xyz | Dimensions::Xyzm))),
3 => Ok(Some(matches!(geom_dim, Dimensions::Xym | Dimensions::Xyzm))),
_ => sedona_internal_err!("unexpected dim_index"),
}
}
/// Fast-path inference of geometry type name from raw WKB bytes
fn invoke_scalar(buf: &[u8], dim_index: usize) -> Result<Option<bool>> {
let header = WkbHeader::try_new(buf).map_err(|e| DataFusionError::External(Box::new(e)))?;
let top_level_dimensions = header
.dimensions()
.map_err(|e| DataFusionError::External(Box::new(e)))?;

// Infer dimension based on first coordinate dimension for cases where it differs from top-level
// e.g GEOMETRYCOLLECTION (POINT Z (1 2 3))
let dimensions;
if let Some(first_geom_dimensions) = header.first_geom_dimensions() {
dimensions = first_geom_dimensions;
} else {
dimensions = top_level_dimensions;
}

if dim_index == 2 {
return Ok(Some(matches!(
dimensions,
Dimensions::Xyz | Dimensions::Xyzm
)));
}
if dim_index == 3 {
return Ok(Some(matches!(
dimensions,
Dimensions::Xym | Dimensions::Xyzm
)));
}
Ok(Some(false))
}
Comment on lines +109 to 138
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Correctness risk: only inspecting the first nested geometry can yield false negatives.

For collections/multis where the first child is XY but a later child is Z/M, this returns False even though the geometry “has Z/M”. Example: GEOMETRYCOLLECTION (POINT (0 0), POINT Z (0 0 1)).

  • Keep the current fast path, but when it evaluates to False for collections/multis, fall back to a lightweight scan of all nested headers (still bytes-only) and short-circuit on the first Z/M match.
  • If iterating headers isn’t available yet, add a slow-path fallback (e.g., previous WkbExecutor) only in these uncertain cases to retain semantics without regressing perf on the common path.

Minimal sketch:

 fn invoke_scalar(buf: &[u8], dim_index: usize) -> Result<Option<bool>> {
-    let header = WkbHeader::try_new(buf).map_err(|e| DataFusionError::External(Box::new(e)))?;
+    let header = WkbHeader::try_new(buf).map_err(|e| DataFusionError::External(Box::new(e)))?;
     let top_level_dimensions = header
         .dimensions()
         .map_err(|e| DataFusionError::External(Box::new(e)))?;

     let dimensions = header.first_geom_dimensions().unwrap_or(top_level_dimensions);

     let want = if dim_index == 2 {
         matches!(dimensions, Dimensions::Xyz | Dimensions::Xyzm)
     } else if dim_index == 3 {
         matches!(dimensions, Dimensions::Xym | Dimensions::Xyzm)
     } else {
         false
     };

-    Ok(Some(want))
+    if want {
+        return Ok(Some(true));
+    }
+
+    // Optional: only for collection/multi types with potential for nested dims
+    // Pseudocode—requires iterator support from WkbHeader/WkbBuffer:
+    // for child in header.iter_children(buf)? {
+    //     let dims = child.dimensions()?;
+    //     if (dim_index == 2 && matches!(dims, Dimensions::Xyz | Dimensions::Xyzm))
+    //        || (dim_index == 3 && matches!(dims, Dimensions::Xym | Dimensions::Xyzm)) {
+    //         return Ok(Some(true));
+    //     }
+    // }
+    Ok(Some(false))
 }

Add tests to cover non-first-child Z/M (see separate test suggestion below).


I'll verify this review comment by examining the actual implementation and related code to confirm whether the correctness risk is genuine.


🏁 Script executed:

#!/bin/bash
# Find the st_haszm.rs file and view its full content
fd -t f 'st_haszm\.rs' --exec cat {}

Length of output: 9008


🏁 Script executed:

#!/bin/bash
# Search for WkbHeader definition and first_geom_dimensions method
rg -n 'fn first_geom_dimensions' -A 5 -B 2

Length of output: 1300


🏁 Script executed:

#!/bin/bash
# Search for tests related to st_haszm to see what edge cases are covered
fd -t f 'test.*haszm|haszm.*test' -i --exec cat {}

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check for any geometry collection test cases with mixed dimensions
rg -n 'GEOMETRYCOLLECTION.*POINT.*Z|hasZ.*Collection|hasMember' -i --type rs

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for WkbHeader implementation and available methods
fd -t f '.*header.*\.rs|.*wkb.*\.rs' -i | head -20 | xargs grep -l 'struct WkbHeader\|impl.*WkbHeader' 2>/dev/null

Length of output: 38


🏁 Script executed:

#!/bin/bash
# Search for any iteration over children in WkbHeader or similar structures
rg -n 'iter.*child|for.*child|children\(' --type rs | head -20

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check if there are any other hasZ/hasM implementations or related logic
rg -n 'hasZ|hasM|Xyzm|first_geom' --type rs -B 2 -A 2 | head -40

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Search for the dimensions method and Dimensions enum
rg -n 'enum Dimensions|pub fn dimensions' --type rs -A 5

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for test files in the same directory
fd -t f '.*test.*\.rs|.*_test\.rs' rust/sedona-functions/src/ --exec ls -lh {}

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check what WKB types are considered collections/multis
rg -n 'Multi|Collection' --type rs | grep -i 'enum\|type\|const' | head -20

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for the exact invoke_scalar function and any related logic
rg -n 'fn invoke_scalar' -A 40 --type rs

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check git history or comments around this function
rg -n 'first child|nested|collection' --type rs -i -B 2 -A 2 | head -50

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Search for WkbExecutor to understand the fallback mechanism mentioned
rg -n 'WkbExecutor|invoke_array' --type rs -B 2 -A 5 | head -40

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check if there's logic that handles MultiGeometry or GeometryCollection specially
rg -n 'GeometryCollection|Multi.*Type|0xF|0x7|WkbType' --type rs -C 2 | head -60

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for any safeguards or special handling in dimensions parsing
rg -n 'pub fn dimensions|fn dimensions' --type rs -A 15 | head -60

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check the structure and fields of WkbHeader
rg -n 'struct WkbHeader' -A 20 --type rs

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for the implementation details of first_geom_dimensions and top-level dimensions
rg -n 'first_geom_dimensions|top_level|nested_header' --type rs -B 3 -A 10

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check if there's any recursive or iterative scanning logic already in place
rg -n 'loop|while|for.*in' --type rs --context=5 | grep -A 8 -B 2 'child\|geom\|header' | head -50

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Verify the actual test file if it exists
find rust/sedona-functions/src -name '*test*' -o -name '*tests*' | xargs ls -lh 2>/dev/null

Length of output: 1130


🏁 Script executed:

#!/bin/bash
# Look for mod tests or #[cfg(test)] blocks in st_haszm
rg -n '#\[cfg\(test\)\]|mod tests|#\[test\]' rust/sedona-functions/src/st_haszm.rs -A 30

Length of output: 2448


🏁 Script executed:

#!/bin/bash
# Check if there are integration tests that might cover this
find rust/sedona-functions -name '*test*.rs' -type f | xargs grep -l 'hasZ\|haszm\|POINT.*Z' 2>/dev/null

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look for the full context around the invoke_scalar function to understand how it's called
rg -n 'fn invoke_scalar|fn invoke_array' --type rs -B 5 -A 50 | head -80

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check if WkbBuffer or similar has methods for iterating through geometry components
rg -n 'impl.*WkbBuffer|struct WkbBuffer|fn.*children|fn.*iter' --type rs -A 8 | head -60

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Summarize WkbHeader available methods
rg -n 'impl.*WkbHeader' -A 100 --type rs | head -120

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check the actual geometry type codes to understand collection detection
rg -n 'GeometryType|WkbGeomType|0x07|0x08|MULTI|COLLECTION' --type rs | head -30

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Search for any dimension array scanning or all-children checking logic
rg -n 'scan|all.*dim|iterate|traverse' --type rs -i | grep -v test | head -20

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Final check: look at the exact line range mentioned in the review
sed -n '109,138p' rust/sedona-functions/src/st_haszm.rs

Length of output: 1022


🏁 Script executed:

#!/bin/bash
# Check what top_level_dimensions actually returns and how it differs from first_geom_dimensions
rg -n 'top_level|first_geom_dimensions|dimensions\(' --type rs -B 3 -A 8 | head -80

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look at the complete WkbHeader structure and all its methods
ast-grep --pattern 'impl WkbHeader { $$$ }'

Length of output: 12874


🏁 Script executed:

#!/bin/bash
# Check for GeometryCollection or Multi type detection at the top level
rg -n 'geometry_type|wkb_type|enum.*Wkb.*Type' --type rs | head -30

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for the exact semantics of what the function should return
rg -n 'hasZ|hasM|dimensions' rust/sedona-functions/src/st_haszm.rs -B 2 -A 2

Length of output: 1297


🏁 Script executed:

#!/bin/bash
# Check if there's documentation about the expected behavior
rg -n '//.*[Zz].*dimension|//.*collection|//.*nested' rust/sedona-functions/src/st_haszm.rs

Length of output: 361


🏁 Script executed:

#!/bin/bash
# Look for any comment that explains the choice to use first_geom_dimensions
rg -n 'first' rust/sedona-functions/src/st_haszm.rs -B 5 -A 5

Length of output: 652


🏁 Script executed:

#!/bin/bash
# Check for similar functions that might handle this differently
rg -n 'fn.*haszm|fn.*hasm|fn.*dims' --type rs -i | head -20

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look at imports and dependencies to understand WkbHeader source
head -40 rust/sedona-functions/src/st_haszm.rs

Length of output: 1562


🏁 Script executed:

#!/bin/bash
# Search for WkbHeader in the broader codebase to find its definition
find rust -name '*.rs' -type f | xargs grep -l 'struct WkbHeader' | head -5

Length of output: 38


🏁 Script executed:

#!/bin/bash
# Get WkbHeader definition file and content
fd 'lib\.rs|mod\.rs' rust/sedona-geos/src --exec grep -l 'WkbHeader' {} \; | head -3 | xargs cat 2>/dev/null | head -100

Length of output: 107


🏁 Script executed:

#!/bin/bash
# More targeted search for WkbHeader
rg -n 'pub struct WkbHeader|pub fn first_geom_dimensions|pub fn dimensions' --type rs -A 15

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check if there's a geometry collection type discriminator
rg -n 'GeometryCollection\|MultiPoint\|MultiLineString\|MultiPolygon|0x07|0x08|0x0A|0x0B|0x0C' --type rs -B 2 -A 2 | head -50

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look at the entire st_haszm.rs file to get full picture
wc -l rust/sedona-functions/src/st_haszm.rs && echo "---" && cat rust/sedona-functions/src/st_haszm.rs

Length of output: 9054


🏁 Script executed:

#!/bin/bash
# Check all files in the same directory
ls -la rust/sedona-functions/src/ | head -30

Length of output: 1863


🏁 Script executed:

#!/bin/bash
# Search for wkb-related modules
find rust -type f -name '*.rs' | xargs grep -l 'WkbHeader\|WkbBuffer' | head -10

Length of output: 76


🏁 Script executed:

#!/bin/bash
# Look at WkbHeader in sedona-core or common location
rg -n 'pub struct WkbHeader\|pub fn first_geom_dimensions' -A 25 --type rs

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check the real behavior by looking at how first_geom_dimensions is used elsewhere
rg -n 'first_geom_dimensions' --type rs -B 3 -A 3

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for the specific test case mentioned in the review
rg -n 'GEOMETRYCOLLECTION.*POINT.*0.*0.*POINT.*Z.*0.*0.*1' --type rs -i

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check for any test about mixed-dimension geometries
rg -n 'mixed|different.*dim|varying' rust/sedona-functions/src/st_haszm.rs -i

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search for test modules
ast-grep --pattern '#[cfg(test)] mod $_ { $$$ }'

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look for all test functions in related files
rg -n '#\[test\]' --type rs -A 3 | grep -E 'hasz|hasm|dimension' -i -B 1 -A 3

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check the structure around invoke_scalar in context
sed -n '90,150p' rust/sedona-functions/src/st_haszm.rs

Length of output: 1876


🏁 Script executed:

#!/bin/bash
# Final verification: check what WkbHeader methods exist
rg -n 'pub fn' rust/sedona-functions/src/ --type rs | grep -i 'header\|dimension'

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for all references to this function to understand its usage
rg -n 'invoke_scalar' --type rs -B 2 -A 2

Length of output: 26


🏁 Script executed:

#!/bin_bash
# Check for geometry collection handling in invoke_array
rg -n 'invoke_array' --type rs -A 80 | head -100

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for the complete picture of how dimensions are handled for collections
rg -n 'pub fn dimensions\|pub fn first_geom_dimensions\|pub fn top_level_dimensions' --type rs -B 2 -A 20

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Understand the actual file structure
find rust -name '*.rs' | xargs grep -l 'struct WkbHeader' -A 20 | head -1 | xargs cat | head -150

Length of output: 5370


🏁 Script executed:

#!/bin/bash
# Check the entire sedona-common or sedona-core crate structure
find rust -name 'lib.rs' | xargs cat | grep -A 30 'WkbHeader\|dimensions' | head -100

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Try a different approach - find where WkbHeader is defined
find rust -name '*.rs' -exec grep -l 'pub struct WkbHeader' {} \;

Length of output: 38


🏁 Script executed:

#!/bin/bash
# Get the full st_haszm.rs file and WkbHeader related code
cat rust/sedona-functions/src/st_haszm.rs

Length of output: 9008


🏁 Script executed:

#!/bin/bash
# Find WkbHeader definition
find rust -name '*.rs' -type f -exec grep -l 'pub struct WkbHeader\|impl WkbHeader' {} \;

Length of output: 38


🏁 Script executed:

#!/bin/bash
# Search for first_geom_dimensions implementation and usage
rg -n 'first_geom_dimensions' --type rs -B 5 -A 10

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for test files related to haszm or dimensions
rg -n '#\[test\].*haszm|#\[test\].*dimension' --type rs -i -A 15

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check if geometry collection types are handled specially anywhere
rg -n 'GeometryCollection\|MultiPoint|MultiLineString|0xF|wkb.*type' --type rs -i -B 2 -A 2 | head -60

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Search for invoke_array to understand the fallback mechanism
rg -n 'invoke_array|fn invoke' --type rs -B 3 -A 20 | head -80

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for any child iteration or nested geometry scanning
rg -n 'children\(|iter_geom|for.*child|while.*byte' --type rs -B 3 -A 5

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check what top_level_dimensions returns vs first_geom_dimensions
rg -n 'fn dimensions\(|pub fn dimensions' --type rs -A 15 | head -50

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for geometry collection tests specifically
rg -n 'test.*collection|geometrycollection' --type rs -i -A 10 | head -60

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Check related WKB handling code
find rust -path '*/wkb*' -o -path '*/common*' -name '*.rs' | head -10 | xargs ls -lh

Length of output: 469


🏁 Script executed:

#!/bin/bash
# Get dimensions enum and related type info
rg -n 'enum Dimensions|pub enum.*Dim' --type rs -A 8

Length of output: 26


🏁 Script executed:

#!/bin/bash
# Look for comments explaining the logic around first geometry
rg -n 'first.*geom|first.*child|nested.*dim' rust/sedona-functions/src/st_haszm.rs -B 2 -A 2

Length of output: 265


🏁 Script executed:

#!/bin/bash
# Check the entire invoke_scalar implementation in detailed context
sed -n '100,145p' rust/sedona-functions/src/st_haszm.rs

Length of output: 1372


Issue verified: false negatives for geometry collections with Z/M only in non-first children.

The concern is valid. The function uses first_geom_dimensions(), which only inspects the first nested geometry. For a collection like GEOMETRYCOLLECTION (POINT (0 0), POINT Z (0 0 1)), it would return false for hasZ even though the geometry does contain Z. Tests confirm this gap—existing tests only cover cases where Z/M appears in the first child or at the top level.

To fix, implement the suggested approach: keep the fast path, but when it returns false for collection/multi types, scan all nested headers to verify no Z/M exists anywhere. This requires either:

  1. Adding child iteration support to WkbHeader/WkbBuffer (bytes-only, no full parsing), or
  2. Falling back to a slower full-geometry inspection only for uncertain cases

Add test case covering GEOMETRYCOLLECTION (POINT (0 0), POINT Z (0 0 1)) to prevent regression.

🤖 Prompt for AI Agents
In rust/sedona-functions/src/st_haszm.rs around lines 109 to 138, the fast-path
uses header.first_geom_dimensions() which misses Z/M in non-first children
(e.g., GEOMETRYCOLLECTION (POINT (0 0), POINT Z (0 0 1))) causing false
negatives; keep the current fast-path but when it would return false for
collection/multi types, scan all nested child headers to detect any Z/M and only
then return false—implement this by adding bytes-only child-header iteration to
WkbHeader/WkbBuffer (preferred) or, if that is not available, fall back to a
slower full-geometry inspection for those uncertain cases; also add a test for
GEOMETRYCOLLECTION (POINT (0 0), POINT Z (0 0 1)) to prevent regression.


#[cfg(test)]
Expand All @@ -137,7 +143,9 @@ mod tests {
use datafusion_expr::ScalarUDF;
use rstest::rstest;
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
use sedona_testing::testers::ScalarUdfTester;
use sedona_testing::{
fixtures::MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB, testers::ScalarUdfTester,
};

use super::*;

Expand Down Expand Up @@ -184,11 +192,19 @@ mod tests {
let result = m_tester.invoke_wkb_scalar(None).unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Null);

// Z-dimension specified only in the nested geometry, but not the geom collection level
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT Z (1 2 3))"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

// Z-dimension specified on both the geom collection and nested geometry level
// Geometry collection with Z dimension both on the geom collection and nested geometry level
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z (POINT Z (1 2 3))"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

let result = m_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT M (1 2 3))"))
.unwrap();
Expand All @@ -203,5 +219,32 @@ mod tests {
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY"))
.unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(false)));

// Empty geometry collections with Z or M dimensions
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z EMPTY"))
.unwrap();
z_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));

let result = m_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION M EMPTY"))
.unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Boolean(Some(true)));
}

#[test]
fn multipoint_with_inferred_z_dimension() {
let z_tester = ScalarUdfTester::new(st_hasz_udf().into(), vec![WKB_GEOMETRY]);
let m_tester = ScalarUdfTester::new(st_hasm_udf().into(), vec![WKB_GEOMETRY]);

let scalar = ScalarValue::Binary(Some(MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB.to_vec()));
assert_eq!(
z_tester.invoke_scalar(scalar.clone()).unwrap(),
ScalarValue::Boolean(Some(true))
);
assert_eq!(
m_tester.invoke_scalar(scalar.clone()).unwrap(),
ScalarValue::Boolean(Some(false))
);
}
}
1 change: 1 addition & 0 deletions rust/sedona-geometry/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ pub mod point_count;
pub mod transform;
pub mod types;
pub mod wkb_factory;
pub mod wkb_header;
Loading
Loading