From fbd5cfc7ee5ca2806f972a73a98eb87a78323b42 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 15:17:26 -0600 Subject: [PATCH 01/16] first go --- rust/sedona-functions/src/executor.rs | 11 +- rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 1 + rust/sedona-functions/src/st_asewkb.rs | 330 +++++++++++++++++++++++++ 4 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 rust/sedona-functions/src/st_asewkb.rs diff --git a/rust/sedona-functions/src/executor.rs b/rust/sedona-functions/src/executor.rs index 0686070fd..f1d679d28 100644 --- a/rust/sedona-functions/src/executor.rs +++ b/rust/sedona-functions/src/executor.rs @@ -18,7 +18,7 @@ use std::iter::zip; use arrow_array::ArrayRef; use arrow_schema::DataType; -use datafusion_common::cast::{as_binary_array, as_binary_view_array}; +use datafusion_common::cast::{as_binary_array, as_binary_view_array, as_struct_array}; use datafusion_common::error::Result; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::ColumnarValue; @@ -357,6 +357,15 @@ impl IterGeo for ArrayRef { } SedonaType::Wkb(_, _) => iter_wkb_binary(as_binary_array(self)?, func), SedonaType::WkbView(_, _) => iter_wkb_binary(as_binary_view_array(self)?, func), + SedonaType::Arrow(DataType::Struct(fields)) + if fields.len() == 2 && fields[0].name() == "item" && fields[1].name() == "crs" => + { + let struct_array = as_struct_array(self)?; + let item_type = SedonaType::from_storage_field(&fields[0])?; + struct_array + .column(0) + .iter_as_wkb_bytes(&item_type, num_iterations, func) + } _ => { // We could cast here as a fallback, iterate and cast per-element, or // implement iter_as_something_else()/supports_iter_xxx() when more geo array types diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 6e38e4262..6e8f884bd 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -28,6 +28,7 @@ mod st_affine_helpers; pub mod st_analyze_agg; mod st_area; mod st_asbinary; +mod st_asewkb; mod st_asgeojson; mod st_astext; mod st_azimuth; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index 80dbbd823..3cf21a674 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -66,6 +66,7 @@ pub fn default_function_set() -> FunctionSet { crate::st_affine::st_affine_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, + crate::st_asewkb::st_asewkb_udf, crate::st_asgeojson::st_asgeojson_udf, crate::st_astext::st_astext_udf, crate::st_azimuth::st_azimuth_udf, diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs new file mode 100644 index 000000000..313ccf6a5 --- /dev/null +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -0,0 +1,330 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{io::Write, sync::Arc, vec}; + +use arrow_array::builder::BinaryBuilder; +use arrow_schema::DataType; +use datafusion_common::{ + cast::{as_string_view_array, as_struct_array}, + error::Result, + exec_err, ScalarValue, +}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use geo_traits::GeometryTrait; +use sedona_common::sedona_internal_err; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_schema::{crs::deserialize_crs, datatypes::SedonaType, matchers::ArgMatcher}; +use wkb::reader::{Dimension, Wkb}; + +use crate::executor::WkbExecutor; + +/// ST_AsEWKB() scalar UDF implementation +/// +/// An implementation of WKB writing using GeoRust's wkt crate. +pub fn st_asewkb_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_asewkb", + vec![Arc::new(STAsEWKBItemCrs {}), Arc::new(STAsEWKB {})], + Volatility::Immutable, + Some(st_asewkb_doc()), + ) +} + +fn st_asewkb_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + r#"Return the Extended Well-Known Binary (EWKB) representation of a geometry or geography. + +Compared to ST_AsBinary(), this function embeds an integer SRID derived from the type or dervied +from the item-level CRS for item CRS types. This is particularly useful for integration with +PostGIS"#, + "ST_AsEWKB (A: Geometry)", + ) + .with_argument("geom", "geometry: Input geometry or geography") + .with_sql_example("SELECT ST_AsEWKB(ST_Point(1.0, 2.0, 4326))") + .build() +} + +#[derive(Debug)] +struct STAsEWKB {} + +impl SedonaScalarKernel for STAsEWKB { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry_or_geography()], + SedonaType::Arrow(DataType::Binary), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let maybe_srid = match &arg_types[0] { + SedonaType::Wkb(_, crs) | SedonaType::WkbView(_, crs) => match crs { + Some(crs) => match crs.srid()? { + Some(srid) if srid != 0 => Some(srid), + _ => None, + }, + None => None, + }, + _ => return sedona_internal_err!("Unexpected input to invoke_batch in ST_AsEWKB"), + }; + + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + write_geometry(&wkb, maybe_srid, &mut builder)?; + builder.append_value([]); + } + None => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct STAsEWKBItemCrs {} + +impl SedonaScalarKernel for STAsEWKBItemCrs { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_item_crs()], + SedonaType::Arrow(DataType::Binary), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let crs_array_ref = match &args[0] { + ColumnarValue::Array(array) => { + let struct_array = as_struct_array(array)?; + struct_array.column(1).clone() + } + ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => { + struct_array.column(1).clone() + } + _ => return sedona_internal_err!("Unexpected item_crs type"), + }; + + let crs_array = as_string_view_array(&crs_array_ref)?; + let mut srid_iter = crs_array + .into_iter() + .map(|maybe_crs_str| match maybe_crs_str { + None => Ok(None), + Some(crs_str) => match deserialize_crs(crs_str)? { + None => Ok(None), + Some(crs) => match crs.srid()? { + Some(srid) => Ok(Some(srid)), + None => exec_err!("CRS {crs} cannot be represented by a single SRID"), + }, + }, + }); + + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + write_geometry(&wkb, srid_iter.next().unwrap()?, &mut builder)?; + builder.append_value([]); + } + None => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +const EWKB_Z_BIT: u32 = 0x80000000; +const EWKB_M_BIT: u32 = 0x40000000; +const EWKB_SRID_BIT: u32 = 0x20000000; + +fn write_geometry(geom: &Wkb, srid: Option, buf: &mut impl Write) -> Result<()> { + match geom.as_type() { + geo_traits::GeometryType::Point(p) => write_point(p, srid, buf), + geo_traits::GeometryType::LineString(ls) => write_linestring(ls, srid, buf), + geo_traits::GeometryType::Polygon(poly) => todo!(), + geo_traits::GeometryType::MultiPoint(mp) => todo!(), + geo_traits::GeometryType::MultiLineString(mls) => todo!(), + geo_traits::GeometryType::MultiPolygon(mpoly) => todo!(), + geo_traits::GeometryType::GeometryCollection(gc) => todo!(), + _ => exec_err!("Unsupported geometry type in ST_AsEWKB()"), + } +} + +fn write_point(geom: &wkb::reader::Point, srid: Option, buf: &mut impl Write) -> Result<()> { + write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?; + buf.write_all(geom.coord_slice())?; + Ok(()) +} + +fn write_linestring( + geom: &wkb::reader::LineString, + srid: Option, + buf: &mut impl Write, +) -> Result<()> { + write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?; + buf.write_all(geom.coords_slice())?; + Ok(()) +} + +fn write_geometry_type_and_srid( + mut base_type: u32, + dimensions: Dimension, + srid: Option, + buf: &mut impl Write, +) -> Result<()> { + buf.write_all(&[0x01])?; + + match dimensions { + Dimension::Xy => {} + Dimension::Xyz => base_type |= EWKB_Z_BIT, + Dimension::Xym => base_type |= EWKB_M_BIT, + Dimension::Xyzm => { + base_type |= EWKB_Z_BIT; + base_type |= EWKB_Z_BIT; + } + } + + if let Some(srid) = srid { + base_type |= EWKB_SRID_BIT; + buf.write_all(&base_type.to_le_bytes())?; + buf.write_all(&srid.to_le_bytes())?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use arrow_array::{ArrayRef, BinaryArray, BinaryViewArray}; + use datafusion_common::scalar::ScalarValue; + use datafusion_expr::ScalarUDF; + use rstest::rstest; + use sedona_schema::datatypes::{ + WKB_GEOGRAPHY, WKB_GEOGRAPHY_ITEM_CRS, WKB_GEOMETRY, WKB_GEOMETRY_ITEM_CRS, + WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOMETRY, + }; + use sedona_testing::testers::ScalarUdfTester; + + use super::*; + + const POINT12: [u8; 21] = [ + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, + ]; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = st_asewkb_udf().into(); + assert_eq!(udf.name(), "st_asewkb"); + assert!(udf.documentation().is_some()) + } + + #[rstest] + fn udf_geometry_input( + #[values( + WKB_GEOMETRY, + WKB_GEOGRAPHY, + WKB_GEOMETRY_ITEM_CRS.clone(), + WKB_GEOGRAPHY_ITEM_CRS.clone(), + )] + sedona_type: SedonaType, + ) { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + assert_eq!( + tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(), + ScalarValue::Binary(Some(POINT12.to_vec())) + ); + + assert_eq!( + tester.invoke_wkb_scalar(None).unwrap(), + ScalarValue::Binary(None) + ); + + let expected_array: BinaryArray = [Some(POINT12), None, Some(POINT12)].iter().collect(); + assert_eq!( + &tester + .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT (1 2)")]) + .unwrap(), + &(Arc::new(expected_array) as ArrayRef) + ); + } + + #[rstest] + fn udf_geometry_view_input( + #[values(WKB_VIEW_GEOMETRY, WKB_VIEW_GEOGRAPHY)] sedona_type: SedonaType, + ) { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + assert_eq!( + tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(), + ScalarValue::BinaryView(Some(POINT12.to_vec())) + ); + + assert_eq!( + tester.invoke_wkb_scalar(None).unwrap(), + ScalarValue::BinaryView(None) + ); + + let expected_array: BinaryViewArray = [Some(POINT12), None, Some(POINT12)].iter().collect(); + assert_eq!( + &tester + .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT (1 2)")]) + .unwrap(), + &(Arc::new(expected_array) as ArrayRef) + ); + } + + #[test] + fn aliases() { + let udf: ScalarUDF = st_asewkb_udf().into(); + assert!(udf.aliases().contains(&"st_aswkb".to_string())); + } +} From 64b45e1a209aa55e55a6fff34fe5db68a4513a57 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 17:02:54 -0600 Subject: [PATCH 02/16] in theory work --- python/sedonadb/tests/functions/test_wkb.py | 79 +++++++++++ rust/sedona-functions/src/st_asewkb.rs | 146 ++++++++++++++++++-- 2 files changed, 215 insertions(+), 10 deletions(-) create mode 100644 python/sedonadb/tests/functions/test_wkb.py diff --git a/python/sedonadb/tests/functions/test_wkb.py b/python/sedonadb/tests/functions/test_wkb.py new file mode 100644 index 000000000..315142b9e --- /dev/null +++ b/python/sedonadb/tests/functions/test_wkb.py @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import shapely +from sedonadb.testing import PostGIS, SedonaDB, geom_or_null, val_or_null +import math + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + "geom", + [ + # XY dimensions + "POINT (1 2)", + "LINESTRING (1 2, 3 4, 5 6)", + "POLYGON ((0 1, 2 0, 2 3, 0 3, 0 1))", + "MULTIPOINT ((1 2), (3 4))", + "MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", + "MULTIPOLYGON (((0 1, 2 0, 2 3, 0 3, 0 1)))", + "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", + # XYZ dimensions + "POINT Z (1 2 3)", + "LINESTRING Z (1 2 3, 4 5 6)", + "POLYGON Z ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", + "MULTIPOINT Z ((1 2 3), (4 5 6))", + "MULTILINESTRING Z ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", + "MULTIPOLYGON Z (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", + "GEOMETRYCOLLECTION Z (POINT Z (1 2 3))", + # XYM dimensions + "POINT M (1 2 3)", + "LINESTRING M (1 2 3, 4 5 6)", + "POLYGON M ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", + "MULTIPOINT M ((1 2 3), (4 5 6))", + "MULTILINESTRING M ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", + "MULTIPOLYGON M (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", + "GEOMETRYCOLLECTION M (POINT M (1 2 3))", + # XYZM dimensions + "POINT ZM (1 2 3 4)", + "LINESTRING ZM (1 2 3 4, 5 6 7 8)", + "POLYGON ZM ((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3))", + "MULTIPOINT ZM ((1 2 3 4), (5 6 7 8))", + "MULTILINESTRING ZM ((1 2 3 4, 5 6 7 8), (9 10 11 12, 13 14 15 16))", + "MULTIPOLYGON ZM (((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3)))", + "GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4))", + # Empty geometries + "POINT EMPTY", + "LINESTRING EMPTY", + "POLYGON EMPTY", + "MULTIPOINT EMPTY", + "MULTILINESTRING EMPTY", + "MULTIPOLYGON EMPTY", + "GEOMETRYCOLLECTION EMPTY", + # NULL + None, + ], +) +def test_st_asbinary(eng, geom): + eng = eng.create_or_skip() + + if geom is not None: + expected = shapely.to_wkb(shapely.from_wkt(geom), output_dimension=4, byte_order=1, flavor="extended") + else: + expected = None + + eng.assert_query_result(f"SELECT ST_AsEWKB({geom_or_null(geom)})", expected) diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index 313ccf6a5..64ca9649d 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -21,15 +21,20 @@ use arrow_schema::DataType; use datafusion_common::{ cast::{as_string_view_array, as_struct_array}, error::Result, - exec_err, ScalarValue, + exec_datafusion_err, exec_err, ScalarValue, }; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; -use geo_traits::GeometryTrait; +use geo_traits::{ + GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait, + MultiPolygonTrait, PointTrait, PolygonTrait, +}; use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_geometry::wkb_factory::{ + write_wkb_coord_trait, write_wkb_empty_point, WKB_MIN_PROBABLE_BYTES, +}; use sedona_schema::{crs::deserialize_crs, datatypes::SedonaType, matchers::ArgMatcher}; use wkb::reader::{Dimension, Wkb}; @@ -186,18 +191,28 @@ fn write_geometry(geom: &Wkb, srid: Option, buf: &mut impl Write) -> Result match geom.as_type() { geo_traits::GeometryType::Point(p) => write_point(p, srid, buf), geo_traits::GeometryType::LineString(ls) => write_linestring(ls, srid, buf), - geo_traits::GeometryType::Polygon(poly) => todo!(), - geo_traits::GeometryType::MultiPoint(mp) => todo!(), - geo_traits::GeometryType::MultiLineString(mls) => todo!(), - geo_traits::GeometryType::MultiPolygon(mpoly) => todo!(), - geo_traits::GeometryType::GeometryCollection(gc) => todo!(), + geo_traits::GeometryType::Polygon(poly) => write_polygon(poly, srid, buf), + geo_traits::GeometryType::MultiPoint(mp) => write_multipoint(mp, srid, buf), + geo_traits::GeometryType::MultiLineString(mls) => write_multilinestring(mls, srid, buf), + geo_traits::GeometryType::MultiPolygon(mpoly) => write_multipolygon(mpoly, srid, buf), + geo_traits::GeometryType::GeometryCollection(gc) => write_geometrycollection(gc, srid, buf), _ => exec_err!("Unsupported geometry type in ST_AsEWKB()"), } } fn write_point(geom: &wkb::reader::Point, srid: Option, buf: &mut impl Write) -> Result<()> { write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?; - buf.write_all(geom.coord_slice())?; + match geom.byte_order() { + wkb::Endianness::BigEndian => match geom.coord() { + Some(c) => { + write_wkb_coord_trait(buf, &c).map_err(|e| exec_datafusion_err!("write err {e}"))? + } + None => write_wkb_empty_point(buf, geom.dim()) + .map_err(|e| exec_datafusion_err!("write err {e}"))?, + }, + wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?, + } + Ok(()) } @@ -207,7 +222,118 @@ fn write_linestring( buf: &mut impl Write, ) -> Result<()> { write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?; - buf.write_all(geom.coords_slice())?; + let num_coords = geom.num_coords() as u32; + buf.write_all(&num_coords.to_le_bytes())?; + match geom.byte_order() { + wkb::Endianness::BigEndian => { + for c in geom.coords() { + write_wkb_coord_trait(buf, &c) + .map_err(|e| exec_datafusion_err!("write err {e}"))?; + } + } + wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, + } + + Ok(()) +} + +fn write_linearring(geom: &wkb::reader::LinearRing, buf: &mut impl Write) -> Result<()> { + let num_coords = geom.num_coords() as u32; + buf.write_all(&num_coords.to_le_bytes())?; + match geom.byte_order() { + wkb::Endianness::BigEndian => { + for c in geom.coords() { + write_wkb_coord_trait(buf, &c) + .map_err(|e| exec_datafusion_err!("write err {e}"))?; + } + } + wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, + } + + Ok(()) +} + +fn write_polygon( + geom: &wkb::reader::Polygon, + srid: Option, + buf: &mut impl Write, +) -> Result<()> { + write_geometry_type_and_srid(3, geom.dimension(), srid, buf)?; + let num_rings = geom.num_interiors() as u32 + geom.exterior().is_some() as u32; + buf.write_all(&num_rings.to_le_bytes())?; + + if let Some(exterior) = geom.exterior() { + write_linearring(exterior, buf)?; + } + + for interior in geom.interiors() { + write_linearring(interior, buf)?; + } + + Ok(()) +} + +fn write_multipoint( + geom: &wkb::reader::MultiPoint, + srid: Option, + buf: &mut impl Write, +) -> Result<()> { + write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; + let num_children = geom.num_points(); + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.points() { + write_point(&child, None, buf)?; + } + + Ok(()) +} + +fn write_multilinestring( + geom: &wkb::reader::MultiLineString, + srid: Option, + buf: &mut impl Write, +) -> Result<()> { + write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; + let num_children = geom.num_line_strings(); + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.line_strings() { + write_linestring(child, None, buf)?; + } + + Ok(()) +} + +fn write_multipolygon( + geom: &wkb::reader::MultiPolygon, + srid: Option, + buf: &mut impl Write, +) -> Result<()> { + write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; + let num_children = geom.num_polygons(); + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.polygons() { + write_polygon(child, None, buf)?; + } + + Ok(()) +} + +fn write_geometrycollection( + geom: &wkb::reader::GeometryCollection, + srid: Option, + buf: &mut impl Write, +) -> Result<()> { + write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; + let num_children = geom.num_geometries(); + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.geometries() { + write_geometry(child, None, buf)?; + } + Ok(()) } From 7b0fa5b6ffa43b4fd690c318fa5f17cab4ce5d10 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 17:16:45 -0600 Subject: [PATCH 03/16] passing tests --- python/sedonadb/tests/functions/test_wkb.py | 10 ++++++---- rust/sedona-functions/src/st_asewkb.rs | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/python/sedonadb/tests/functions/test_wkb.py b/python/sedonadb/tests/functions/test_wkb.py index 315142b9e..539fd0b88 100644 --- a/python/sedonadb/tests/functions/test_wkb.py +++ b/python/sedonadb/tests/functions/test_wkb.py @@ -17,8 +17,8 @@ import pytest import shapely -from sedonadb.testing import PostGIS, SedonaDB, geom_or_null, val_or_null -import math +from sedonadb.testing import PostGIS, SedonaDB, geom_or_null + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( @@ -68,11 +68,13 @@ None, ], ) -def test_st_asbinary(eng, geom): +def test_st_asewkb(eng, geom): eng = eng.create_or_skip() if geom is not None: - expected = shapely.to_wkb(shapely.from_wkt(geom), output_dimension=4, byte_order=1, flavor="extended") + expected = shapely.to_wkb( + shapely.from_wkt(geom), output_dimension=4, byte_order=1, flavor="extended" + ) else: expected = None diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index 64ca9649d..c37901eaf 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -99,6 +99,7 @@ impl SedonaScalarKernel for STAsEWKB { }, None => None, }, + SedonaType::Arrow(DataType::Null) => None, _ => return sedona_internal_err!("Unexpected input to invoke_batch in ST_AsEWKB"), }; @@ -279,7 +280,7 @@ fn write_multipoint( buf: &mut impl Write, ) -> Result<()> { write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; - let num_children = geom.num_points(); + let num_children = geom.num_points() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.points() { @@ -294,8 +295,8 @@ fn write_multilinestring( srid: Option, buf: &mut impl Write, ) -> Result<()> { - write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; - let num_children = geom.num_line_strings(); + write_geometry_type_and_srid(5, geom.dimension(), srid, buf)?; + let num_children = geom.num_line_strings() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.line_strings() { @@ -310,8 +311,8 @@ fn write_multipolygon( srid: Option, buf: &mut impl Write, ) -> Result<()> { - write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; - let num_children = geom.num_polygons(); + write_geometry_type_and_srid(6, geom.dimension(), srid, buf)?; + let num_children = geom.num_polygons() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.polygons() { @@ -326,8 +327,8 @@ fn write_geometrycollection( srid: Option, buf: &mut impl Write, ) -> Result<()> { - write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; - let num_children = geom.num_geometries(); + write_geometry_type_and_srid(7, geom.dimension(), srid, buf)?; + let num_children = geom.num_geometries() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.geometries() { @@ -351,7 +352,7 @@ fn write_geometry_type_and_srid( Dimension::Xym => base_type |= EWKB_M_BIT, Dimension::Xyzm => { base_type |= EWKB_Z_BIT; - base_type |= EWKB_Z_BIT; + base_type |= EWKB_M_BIT; } } @@ -359,6 +360,8 @@ fn write_geometry_type_and_srid( base_type |= EWKB_SRID_BIT; buf.write_all(&base_type.to_le_bytes())?; buf.write_all(&srid.to_le_bytes())?; + } else { + buf.write_all(&base_type.to_le_bytes())?; } Ok(()) From 5f699dac3c3769c84d35760b8ab24c1adc13dd99 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 17:32:58 -0600 Subject: [PATCH 04/16] working! --- python/sedonadb/python/sedonadb/testing.py | 8 ++++++-- python/sedonadb/tests/functions/test_wkb.py | 18 +++++++++++++++--- rust/sedona-functions/src/executor.rs | 14 ++++++++++++++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/python/sedonadb/python/sedonadb/testing.py b/python/sedonadb/python/sedonadb/testing.py index 83533dd6f..c6d6b375e 100644 --- a/python/sedonadb/python/sedonadb/testing.py +++ b/python/sedonadb/python/sedonadb/testing.py @@ -624,11 +624,15 @@ def __init__(self, uri=None): cur.execute("SET max_parallel_workers_per_gather TO 0") -def geom_or_null(arg): +def geom_or_null(arg, srid=None): """Format SQL expression for a geometry object or NULL""" if arg is None: return "NULL" - return f"ST_GeomFromText('{arg}')" + + if srid is None: + return f"ST_GeomFromText('{arg}')" + else: + return f"ST_GeomFromEWKT('SRID={srid};{arg}')" def geog_or_null(arg): diff --git a/python/sedonadb/tests/functions/test_wkb.py b/python/sedonadb/tests/functions/test_wkb.py index 539fd0b88..424d9a36b 100644 --- a/python/sedonadb/tests/functions/test_wkb.py +++ b/python/sedonadb/tests/functions/test_wkb.py @@ -21,6 +21,7 @@ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize("srid", [None, 4326]) @pytest.mark.parametrize( "geom", [ @@ -68,14 +69,25 @@ None, ], ) -def test_st_asewkb(eng, geom): +def test_st_asewkb(eng, srid, geom): eng = eng.create_or_skip() if geom is not None: + shapely_geom = shapely.from_wkt(geom) + if srid is not None: + shapely_geom = shapely.set_srid(shapely_geom, srid) + write_srid = True + else: + write_srid = False + expected = shapely.to_wkb( - shapely.from_wkt(geom), output_dimension=4, byte_order=1, flavor="extended" + shapely_geom, + output_dimension=4, + byte_order=1, + flavor="extended", + include_srid=write_srid, ) else: expected = None - eng.assert_query_result(f"SELECT ST_AsEWKB({geom_or_null(geom)})", expected) + eng.assert_query_result(f"SELECT ST_AsEWKB({geom_or_null(geom, srid)})", expected) diff --git a/rust/sedona-functions/src/executor.rs b/rust/sedona-functions/src/executor.rs index f1d679d28..8809ac0ef 100644 --- a/rust/sedona-functions/src/executor.rs +++ b/rust/sedona-functions/src/executor.rs @@ -383,6 +383,20 @@ impl ScalarGeo for ScalarValue { | ScalarValue::BinaryView(maybe_item) | ScalarValue::LargeBinary(maybe_item) => Ok(maybe_item.as_deref()), ScalarValue::Null => Ok(None), + ScalarValue::Struct(s) + if s.fields().len() == 2 + && s.fields()[0].name() == "item" + && s.fields()[1].name() == "crs" => + { + let item_type = SedonaType::from_storage_field(&s.fields()[0])?; + let mut out = None; + s.column(0).iter_as_wkb_bytes(&item_type, 1, |v| { + out = v; + Ok(()) + })?; + + Ok(out) + } _ => sedona_internal_err!("Can't iterate over {:?} ScalarValue as &[u8]", self), } } From ba80f162e54b95b5b426d1e15a0885783122789a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 17:33:26 -0600 Subject: [PATCH 05/16] typo --- rust/sedona-functions/src/st_asewkb.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index c37901eaf..b60378088 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -57,7 +57,7 @@ fn st_asewkb_doc() -> Documentation { DOC_SECTION_OTHER, r#"Return the Extended Well-Known Binary (EWKB) representation of a geometry or geography. -Compared to ST_AsBinary(), this function embeds an integer SRID derived from the type or dervied +Compared to ST_AsBinary(), this function embeds an integer SRID derived from the type or derived from the item-level CRS for item CRS types. This is particularly useful for integration with PostGIS"#, "ST_AsEWKB (A: Geometry)", From 41e758c762abf66612f0998c4866ee4f1808d071 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 23:07:53 -0600 Subject: [PATCH 06/16] move ewkb to sedona-geometry --- rust/sedona-functions/src/st_asewkb.rs | 200 +--------------------- rust/sedona-geometry/src/ewkb_factory.rs | 204 +++++++++++++++++++++++ rust/sedona-geometry/src/lib.rs | 1 + 3 files changed, 211 insertions(+), 194 deletions(-) create mode 100644 rust/sedona-geometry/src/ewkb_factory.rs diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index b60378088..c4602e143 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use std::{io::Write, sync::Arc, vec}; +use std::{sync::Arc, vec}; use arrow_array::builder::BinaryBuilder; use arrow_schema::DataType; @@ -26,17 +26,10 @@ use datafusion_common::{ use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; -use geo_traits::{ - GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait, - MultiPolygonTrait, PointTrait, PolygonTrait, -}; use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_geometry::wkb_factory::{ - write_wkb_coord_trait, write_wkb_empty_point, WKB_MIN_PROBABLE_BYTES, -}; +use sedona_geometry::{ewkb_factory::write_ewkb_geometry, wkb_factory::WKB_MIN_PROBABLE_BYTES}; use sedona_schema::{crs::deserialize_crs, datatypes::SedonaType, matchers::ArgMatcher}; -use wkb::reader::{Dimension, Wkb}; use crate::executor::WkbExecutor; @@ -106,7 +99,8 @@ impl SedonaScalarKernel for STAsEWKB { executor.execute_wkb_void(|maybe_wkb| { match maybe_wkb { Some(wkb) => { - write_geometry(&wkb, maybe_srid, &mut builder)?; + write_ewkb_geometry(&wkb, maybe_srid, &mut builder) + .map_err(|e| exec_datafusion_err!("EWKB writer error {e}"))?; builder.append_value([]); } None => builder.append_null(), @@ -171,7 +165,8 @@ impl SedonaScalarKernel for STAsEWKBItemCrs { executor.execute_wkb_void(|maybe_wkb| { match maybe_wkb { Some(wkb) => { - write_geometry(&wkb, srid_iter.next().unwrap()?, &mut builder)?; + write_ewkb_geometry(&wkb, srid_iter.next().unwrap()?, &mut builder) + .map_err(|e| exec_datafusion_err!("EWKB writer error {e}"))?; builder.append_value([]); } None => builder.append_null(), @@ -184,189 +179,6 @@ impl SedonaScalarKernel for STAsEWKBItemCrs { } } -const EWKB_Z_BIT: u32 = 0x80000000; -const EWKB_M_BIT: u32 = 0x40000000; -const EWKB_SRID_BIT: u32 = 0x20000000; - -fn write_geometry(geom: &Wkb, srid: Option, buf: &mut impl Write) -> Result<()> { - match geom.as_type() { - geo_traits::GeometryType::Point(p) => write_point(p, srid, buf), - geo_traits::GeometryType::LineString(ls) => write_linestring(ls, srid, buf), - geo_traits::GeometryType::Polygon(poly) => write_polygon(poly, srid, buf), - geo_traits::GeometryType::MultiPoint(mp) => write_multipoint(mp, srid, buf), - geo_traits::GeometryType::MultiLineString(mls) => write_multilinestring(mls, srid, buf), - geo_traits::GeometryType::MultiPolygon(mpoly) => write_multipolygon(mpoly, srid, buf), - geo_traits::GeometryType::GeometryCollection(gc) => write_geometrycollection(gc, srid, buf), - _ => exec_err!("Unsupported geometry type in ST_AsEWKB()"), - } -} - -fn write_point(geom: &wkb::reader::Point, srid: Option, buf: &mut impl Write) -> Result<()> { - write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?; - match geom.byte_order() { - wkb::Endianness::BigEndian => match geom.coord() { - Some(c) => { - write_wkb_coord_trait(buf, &c).map_err(|e| exec_datafusion_err!("write err {e}"))? - } - None => write_wkb_empty_point(buf, geom.dim()) - .map_err(|e| exec_datafusion_err!("write err {e}"))?, - }, - wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?, - } - - Ok(()) -} - -fn write_linestring( - geom: &wkb::reader::LineString, - srid: Option, - buf: &mut impl Write, -) -> Result<()> { - write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?; - let num_coords = geom.num_coords() as u32; - buf.write_all(&num_coords.to_le_bytes())?; - match geom.byte_order() { - wkb::Endianness::BigEndian => { - for c in geom.coords() { - write_wkb_coord_trait(buf, &c) - .map_err(|e| exec_datafusion_err!("write err {e}"))?; - } - } - wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, - } - - Ok(()) -} - -fn write_linearring(geom: &wkb::reader::LinearRing, buf: &mut impl Write) -> Result<()> { - let num_coords = geom.num_coords() as u32; - buf.write_all(&num_coords.to_le_bytes())?; - match geom.byte_order() { - wkb::Endianness::BigEndian => { - for c in geom.coords() { - write_wkb_coord_trait(buf, &c) - .map_err(|e| exec_datafusion_err!("write err {e}"))?; - } - } - wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, - } - - Ok(()) -} - -fn write_polygon( - geom: &wkb::reader::Polygon, - srid: Option, - buf: &mut impl Write, -) -> Result<()> { - write_geometry_type_and_srid(3, geom.dimension(), srid, buf)?; - let num_rings = geom.num_interiors() as u32 + geom.exterior().is_some() as u32; - buf.write_all(&num_rings.to_le_bytes())?; - - if let Some(exterior) = geom.exterior() { - write_linearring(exterior, buf)?; - } - - for interior in geom.interiors() { - write_linearring(interior, buf)?; - } - - Ok(()) -} - -fn write_multipoint( - geom: &wkb::reader::MultiPoint, - srid: Option, - buf: &mut impl Write, -) -> Result<()> { - write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; - let num_children = geom.num_points() as u32; - buf.write_all(&num_children.to_le_bytes())?; - - for child in geom.points() { - write_point(&child, None, buf)?; - } - - Ok(()) -} - -fn write_multilinestring( - geom: &wkb::reader::MultiLineString, - srid: Option, - buf: &mut impl Write, -) -> Result<()> { - write_geometry_type_and_srid(5, geom.dimension(), srid, buf)?; - let num_children = geom.num_line_strings() as u32; - buf.write_all(&num_children.to_le_bytes())?; - - for child in geom.line_strings() { - write_linestring(child, None, buf)?; - } - - Ok(()) -} - -fn write_multipolygon( - geom: &wkb::reader::MultiPolygon, - srid: Option, - buf: &mut impl Write, -) -> Result<()> { - write_geometry_type_and_srid(6, geom.dimension(), srid, buf)?; - let num_children = geom.num_polygons() as u32; - buf.write_all(&num_children.to_le_bytes())?; - - for child in geom.polygons() { - write_polygon(child, None, buf)?; - } - - Ok(()) -} - -fn write_geometrycollection( - geom: &wkb::reader::GeometryCollection, - srid: Option, - buf: &mut impl Write, -) -> Result<()> { - write_geometry_type_and_srid(7, geom.dimension(), srid, buf)?; - let num_children = geom.num_geometries() as u32; - buf.write_all(&num_children.to_le_bytes())?; - - for child in geom.geometries() { - write_geometry(child, None, buf)?; - } - - Ok(()) -} - -fn write_geometry_type_and_srid( - mut base_type: u32, - dimensions: Dimension, - srid: Option, - buf: &mut impl Write, -) -> Result<()> { - buf.write_all(&[0x01])?; - - match dimensions { - Dimension::Xy => {} - Dimension::Xyz => base_type |= EWKB_Z_BIT, - Dimension::Xym => base_type |= EWKB_M_BIT, - Dimension::Xyzm => { - base_type |= EWKB_Z_BIT; - base_type |= EWKB_M_BIT; - } - } - - if let Some(srid) = srid { - base_type |= EWKB_SRID_BIT; - buf.write_all(&base_type.to_le_bytes())?; - buf.write_all(&srid.to_le_bytes())?; - } else { - buf.write_all(&base_type.to_le_bytes())?; - } - - Ok(()) -} - #[cfg(test)] mod tests { use arrow_array::{ArrayRef, BinaryArray, BinaryViewArray}; diff --git a/rust/sedona-geometry/src/ewkb_factory.rs b/rust/sedona-geometry/src/ewkb_factory.rs new file mode 100644 index 000000000..cc56bf4d9 --- /dev/null +++ b/rust/sedona-geometry/src/ewkb_factory.rs @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::Write; + +use wkb::reader::Wkb; +use geo_traits::{GeometryTrait, PointTrait, LineStringTrait, PolygonTrait, MultiPointTrait, MultiPolygonTrait, MultiLineStringTrait, +GeometryCollectionTrait}; + +use crate::{error::SedonaGeometryError, wkb_factory::{write_wkb_coord_trait, write_wkb_empty_point}}; + +const EWKB_Z_BIT: u32 = 0x80000000; +const EWKB_M_BIT: u32 = 0x40000000; +const EWKB_SRID_BIT: u32 = 0x20000000; + +pub fn write_ewkb_geometry(geom: &Wkb, srid: Option, buf: &mut impl Write) -> Result<(), SedonaGeometryError> { + match geom.as_type() { + geo_traits::GeometryType::Point(p) => write_ewkb_point(p, srid, buf), + geo_traits::GeometryType::LineString(ls) => write_ewkb_line_string(ls, srid, buf), + geo_traits::GeometryType::Polygon(poly) => write_ewkb_polygon(poly, srid, buf), + geo_traits::GeometryType::MultiPoint(mp) => write_ewkb_multi_point(mp, srid, buf), + geo_traits::GeometryType::MultiLineString(mls) => write_ewkb_multi_line_string(mls, srid, buf), + geo_traits::GeometryType::MultiPolygon(mpoly) => write_ewkb_multi_polygon(mpoly, srid, buf), + geo_traits::GeometryType::GeometryCollection(gc) => write_ewkb_geometry_collection(gc, srid, buf), + _ => Err(SedonaGeometryError::Invalid("Unsupported EWKB geometry type".to_string())), + } +} + +fn write_ewkb_point(geom: &wkb::reader::Point, srid: Option, buf: &mut impl Write) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?; + match geom.byte_order() { + wkb::Endianness::BigEndian => match geom.coord() { + Some(c) => { + write_wkb_coord_trait(buf, &c)? + } + None => write_wkb_empty_point(buf, geom.dim())?, + }, + wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?, + } + + Ok(()) +} + +fn write_ewkb_line_string( + geom: &wkb::reader::LineString, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?; + let num_coords = geom.num_coords() as u32; + buf.write_all(&num_coords.to_le_bytes())?; + match geom.byte_order() { + wkb::Endianness::BigEndian => { + for c in geom.coords() { + write_wkb_coord_trait(buf, &c)?; + } + } + wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, + } + + Ok(()) +} + +fn write_linearring(geom: &wkb::reader::LinearRing, buf: &mut impl Write) -> Result<(), SedonaGeometryError> { + let num_coords = geom.num_coords() as u32; + buf.write_all(&num_coords.to_le_bytes())?; + match geom.byte_order() { + wkb::Endianness::BigEndian => { + for c in geom.coords() { + write_wkb_coord_trait(buf, &c)?; + } + } + wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, + } + + Ok(()) +} + +fn write_ewkb_polygon( + geom: &wkb::reader::Polygon, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(3, geom.dimension(), srid, buf)?; + let num_rings = geom.num_interiors() as u32 + geom.exterior().is_some() as u32; + buf.write_all(&num_rings.to_le_bytes())?; + + if let Some(exterior) = geom.exterior() { + write_linearring(exterior, buf)?; + } + + for interior in geom.interiors() { + write_linearring(interior, buf)?; + } + + Ok(()) +} + +fn write_ewkb_multi_point( + geom: &wkb::reader::MultiPoint, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; + let num_children = geom.num_points() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.points() { + write_ewkb_point(&child, None, buf)?; + } + + Ok(()) +} + +fn write_ewkb_multi_line_string( + geom: &wkb::reader::MultiLineString, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(5, geom.dimension(), srid, buf)?; + let num_children = geom.num_line_strings() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.line_strings() { + write_ewkb_line_string(child, None, buf)?; + } + + Ok(()) +} + +fn write_ewkb_multi_polygon( + geom: &wkb::reader::MultiPolygon, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(6, geom.dimension(), srid, buf)?; + let num_children = geom.num_polygons() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.polygons() { + write_ewkb_polygon(child, None, buf)?; + } + + Ok(()) +} + +fn write_ewkb_geometry_collection( + geom: &wkb::reader::GeometryCollection, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(7, geom.dimension(), srid, buf)?; + let num_children = geom.num_geometries() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.geometries() { + write_ewkb_geometry(child, None, buf)?; + } + + Ok(()) +} + +fn write_geometry_type_and_srid( + mut base_type: u32, + dimensions: wkb::reader::Dimension, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + buf.write_all(&[0x01])?; + + match dimensions { + wkb::reader::Dimension::Xy => {} + wkb::reader::Dimension::Xyz => base_type |= EWKB_Z_BIT, + wkb::reader::Dimension::Xym => base_type |= EWKB_M_BIT, + wkb::reader::Dimension::Xyzm => { + base_type |= EWKB_Z_BIT; + base_type |= EWKB_M_BIT; + } + } + + if let Some(srid) = srid { + base_type |= EWKB_SRID_BIT; + buf.write_all(&base_type.to_le_bytes())?; + buf.write_all(&srid.to_le_bytes())?; + } else { + buf.write_all(&base_type.to_le_bytes())?; + } + + Ok(()) +} diff --git a/rust/sedona-geometry/src/lib.rs b/rust/sedona-geometry/src/lib.rs index f189ec7b4..5de9f65e2 100644 --- a/rust/sedona-geometry/src/lib.rs +++ b/rust/sedona-geometry/src/lib.rs @@ -18,6 +18,7 @@ pub mod analyze; pub mod bounding_box; pub mod bounds; pub mod error; +pub mod ewkb_factory; pub mod interval; pub mod is_empty; pub mod point_count; From fa9272a9115e7d2f250ab9536295ac07a3300a00 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 23:37:34 -0600 Subject: [PATCH 07/16] add roundtrip test --- rust/sedona-geometry/src/ewkb_factory.rs | 133 +++++++++++++++++++++-- 1 file changed, 121 insertions(+), 12 deletions(-) diff --git a/rust/sedona-geometry/src/ewkb_factory.rs b/rust/sedona-geometry/src/ewkb_factory.rs index cc56bf4d9..2cecabb67 100644 --- a/rust/sedona-geometry/src/ewkb_factory.rs +++ b/rust/sedona-geometry/src/ewkb_factory.rs @@ -17,36 +17,53 @@ use std::io::Write; +use geo_traits::{ + GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait, + MultiPolygonTrait, PointTrait, PolygonTrait, +}; use wkb::reader::Wkb; -use geo_traits::{GeometryTrait, PointTrait, LineStringTrait, PolygonTrait, MultiPointTrait, MultiPolygonTrait, MultiLineStringTrait, -GeometryCollectionTrait}; -use crate::{error::SedonaGeometryError, wkb_factory::{write_wkb_coord_trait, write_wkb_empty_point}}; +use crate::{ + error::SedonaGeometryError, + wkb_factory::{write_wkb_coord_trait, write_wkb_empty_point}, +}; const EWKB_Z_BIT: u32 = 0x80000000; const EWKB_M_BIT: u32 = 0x40000000; const EWKB_SRID_BIT: u32 = 0x20000000; -pub fn write_ewkb_geometry(geom: &Wkb, srid: Option, buf: &mut impl Write) -> Result<(), SedonaGeometryError> { +pub fn write_ewkb_geometry( + geom: &Wkb, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { match geom.as_type() { geo_traits::GeometryType::Point(p) => write_ewkb_point(p, srid, buf), geo_traits::GeometryType::LineString(ls) => write_ewkb_line_string(ls, srid, buf), geo_traits::GeometryType::Polygon(poly) => write_ewkb_polygon(poly, srid, buf), geo_traits::GeometryType::MultiPoint(mp) => write_ewkb_multi_point(mp, srid, buf), - geo_traits::GeometryType::MultiLineString(mls) => write_ewkb_multi_line_string(mls, srid, buf), + geo_traits::GeometryType::MultiLineString(mls) => { + write_ewkb_multi_line_string(mls, srid, buf) + } geo_traits::GeometryType::MultiPolygon(mpoly) => write_ewkb_multi_polygon(mpoly, srid, buf), - geo_traits::GeometryType::GeometryCollection(gc) => write_ewkb_geometry_collection(gc, srid, buf), - _ => Err(SedonaGeometryError::Invalid("Unsupported EWKB geometry type".to_string())), + geo_traits::GeometryType::GeometryCollection(gc) => { + write_ewkb_geometry_collection(gc, srid, buf) + } + _ => Err(SedonaGeometryError::Invalid( + "Unsupported EWKB geometry type".to_string(), + )), } } -fn write_ewkb_point(geom: &wkb::reader::Point, srid: Option, buf: &mut impl Write) -> Result<(), SedonaGeometryError> { +fn write_ewkb_point( + geom: &wkb::reader::Point, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?; match geom.byte_order() { wkb::Endianness::BigEndian => match geom.coord() { - Some(c) => { - write_wkb_coord_trait(buf, &c)? - } + Some(c) => write_wkb_coord_trait(buf, &c)?, None => write_wkb_empty_point(buf, geom.dim())?, }, wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?, @@ -75,7 +92,10 @@ fn write_ewkb_line_string( Ok(()) } -fn write_linearring(geom: &wkb::reader::LinearRing, buf: &mut impl Write) -> Result<(), SedonaGeometryError> { +fn write_linearring( + geom: &wkb::reader::LinearRing, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { let num_coords = geom.num_coords() as u32; buf.write_all(&num_coords.to_le_bytes())?; match geom.byte_order() { @@ -202,3 +222,92 @@ fn write_geometry_type_and_srid( Ok(()) } + +#[cfg(test)] +mod test { + + use rstest::rstest; + use wkb::{writer::WriteOptions, Endianness}; + + use super::*; + + #[rstest] + fn test_roundtrip( + #[values(Endianness::LittleEndian, Endianness::BigEndian)] endianness: Endianness, + ) { + for wkt_str in ROUNDTRIP_CASES { + let wkt: wkt::Wkt = wkt_str.parse().unwrap(); + + let mut iso_wkb = Vec::new(); + wkb::writer::write_geometry(&mut iso_wkb, &wkt, &WriteOptions { endianness }).unwrap(); + let wkb_geom = wkb::reader::read_wkb(&iso_wkb).unwrap(); + + let mut ewkb_no_srid = Vec::new(); + write_ewkb_geometry(&wkb_geom, None, &mut ewkb_no_srid).unwrap(); + + let mut ewkb_with_srid = Vec::new(); + write_ewkb_geometry(&wkb_geom, Some(4326), &mut ewkb_with_srid).unwrap(); + + // Check that the ewkbs have the correct number of bytes + assert_eq!(ewkb_no_srid.len(), iso_wkb.len()); + assert_eq!(ewkb_with_srid.len(), ewkb_no_srid.len() + size_of::()); + + // Check the rendered WKT of the no srid EWKB + let wkb_geom_roundtrip_no_srid = wkb::reader::read_wkb(&ewkb_no_srid).unwrap(); + let mut wkt_roundtrip_no_srid = String::new(); + wkt::to_wkt::write_geometry(&mut wkt_roundtrip_no_srid, &wkb_geom_roundtrip_no_srid) + .unwrap(); + assert_eq!(wkt_roundtrip_no_srid, wkt_str); + + // Check the rendered WKT of the srid EWKB + let wkb_geom_roundtrip_with_srid = wkb::reader::read_wkb(&ewkb_with_srid).unwrap(); + let mut wkt_roundtrip_with_srid = String::new(); + wkt::to_wkt::write_geometry(&mut wkt_roundtrip_with_srid, &wkb_geom_roundtrip_with_srid) + .unwrap(); + assert_eq!(wkt_roundtrip_with_srid, wkt_str); + } + } + + const ROUNDTRIP_CASES: [&str; 35] = [ + // XY dimensions + "POINT (1 2)", + "LINESTRING (1 2, 3 4, 5 6)", + "POLYGON ((0 1, 2 0, 2 3, 0 3, 0 1))", + "MULTIPOINT ((1 2), (3 4))", + "MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", + "MULTIPOLYGON (((0 1, 2 0, 2 3, 0 3, 0 1)))", + "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", + // XYZ dimensions + "POINT Z (1 2 3)", + "LINESTRING Z (1 2 3, 4 5 6)", + "POLYGON Z ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", + "MULTIPOINT Z ((1 2 3), (4 5 6))", + "MULTILINESTRING Z ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", + "MULTIPOLYGON Z (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", + "GEOMETRYCOLLECTION Z (POINT Z (1 2 3))", + // XYM dimensions + "POINT M (1 2 3)", + "LINESTRING M (1 2 3, 4 5 6)", + "POLYGON M ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", + "MULTIPOINT M ((1 2 3), (4 5 6))", + "MULTILINESTRING M ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", + "MULTIPOLYGON M (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", + "GEOMETRYCOLLECTION M (POINT M (1 2 3))", + // XYZM dimensions + "POINT ZM (1 2 3 4)", + "LINESTRING ZM (1 2 3 4, 5 6 7 8)", + "POLYGON ZM ((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3))", + "MULTIPOINT ZM ((1 2 3 4), (5 6 7 8))", + "MULTILINESTRING ZM ((1 2 3 4, 5 6 7 8), (9 10 11 12, 13 14 15 16))", + "MULTIPOLYGON ZM (((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3)))", + "GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4))", + // Empty geometries + "POINT EMPTY", + "LINESTRING EMPTY", + "POLYGON EMPTY", + "MULTIPOINT EMPTY", + "MULTILINESTRING EMPTY", + "MULTIPOLYGON EMPTY", + "GEOMETRYCOLLECTION EMPTY", + ]; +} From 215347cac2ececebbca50e314d0e8bc99856deaf Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 23:43:38 -0600 Subject: [PATCH 08/16] fix test --- rust/sedona-geometry/src/ewkb_factory.rs | 86 ++++++++++++++---------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/rust/sedona-geometry/src/ewkb_factory.rs b/rust/sedona-geometry/src/ewkb_factory.rs index 2cecabb67..53397f915 100644 --- a/rust/sedona-geometry/src/ewkb_factory.rs +++ b/rust/sedona-geometry/src/ewkb_factory.rs @@ -23,10 +23,7 @@ use geo_traits::{ }; use wkb::reader::Wkb; -use crate::{ - error::SedonaGeometryError, - wkb_factory::{write_wkb_coord_trait, write_wkb_empty_point}, -}; +use crate::{error::SedonaGeometryError, wkb_factory::write_wkb_coord_trait}; const EWKB_Z_BIT: u32 = 0x80000000; const EWKB_M_BIT: u32 = 0x40000000; @@ -64,7 +61,11 @@ fn write_ewkb_point( match geom.byte_order() { wkb::Endianness::BigEndian => match geom.coord() { Some(c) => write_wkb_coord_trait(buf, &c)?, - None => write_wkb_empty_point(buf, geom.dim())?, + None => { + for _ in 0..geom.dim().size() { + buf.write_all(&f64::NAN.to_le_bytes())?; + } + } }, wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?, } @@ -249,8 +250,16 @@ mod test { write_ewkb_geometry(&wkb_geom, Some(4326), &mut ewkb_with_srid).unwrap(); // Check that the ewkbs have the correct number of bytes - assert_eq!(ewkb_no_srid.len(), iso_wkb.len()); - assert_eq!(ewkb_with_srid.len(), ewkb_no_srid.len() + size_of::()); + assert_eq!( + ewkb_no_srid.len(), + iso_wkb.len(), + "incorrect number of bytes for case {wkt_str} without srid" + ); + assert_eq!( + ewkb_with_srid.len(), + ewkb_no_srid.len() + size_of::(), + "incorrect number of bytes for case {wkt_str} with srid" + ); // Check the rendered WKT of the no srid EWKB let wkb_geom_roundtrip_no_srid = wkb::reader::read_wkb(&ewkb_no_srid).unwrap(); @@ -262,45 +271,48 @@ mod test { // Check the rendered WKT of the srid EWKB let wkb_geom_roundtrip_with_srid = wkb::reader::read_wkb(&ewkb_with_srid).unwrap(); let mut wkt_roundtrip_with_srid = String::new(); - wkt::to_wkt::write_geometry(&mut wkt_roundtrip_with_srid, &wkb_geom_roundtrip_with_srid) - .unwrap(); + wkt::to_wkt::write_geometry( + &mut wkt_roundtrip_with_srid, + &wkb_geom_roundtrip_with_srid, + ) + .unwrap(); assert_eq!(wkt_roundtrip_with_srid, wkt_str); } } const ROUNDTRIP_CASES: [&str; 35] = [ // XY dimensions - "POINT (1 2)", - "LINESTRING (1 2, 3 4, 5 6)", - "POLYGON ((0 1, 2 0, 2 3, 0 3, 0 1))", - "MULTIPOINT ((1 2), (3 4))", - "MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", - "MULTIPOLYGON (((0 1, 2 0, 2 3, 0 3, 0 1)))", - "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", + "POINT(1 2)", + "LINESTRING(1 2,3 4,5 6)", + "POLYGON((0 1,2 0,2 3,0 3,0 1))", + "MULTIPOINT((1 2),(3 4))", + "MULTILINESTRING((1 2,3 4),(5 6,7 8))", + "MULTIPOLYGON(((0 1,2 0,2 3,0 3,0 1)))", + "GEOMETRYCOLLECTION(POINT(1 2),LINESTRING(3 4,5 6))", // XYZ dimensions - "POINT Z (1 2 3)", - "LINESTRING Z (1 2 3, 4 5 6)", - "POLYGON Z ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", - "MULTIPOINT Z ((1 2 3), (4 5 6))", - "MULTILINESTRING Z ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", - "MULTIPOLYGON Z (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", - "GEOMETRYCOLLECTION Z (POINT Z (1 2 3))", + "POINT Z(1 2 3)", + "LINESTRING Z(1 2 3,4 5 6)", + "POLYGON Z((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))", + "MULTIPOINT Z((1 2 3),(4 5 6))", + "MULTILINESTRING Z((1 2 3,4 5 6),(7 8 9,10 11 12))", + "MULTIPOLYGON Z(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))", + "GEOMETRYCOLLECTION Z(POINT Z(1 2 3))", // XYM dimensions - "POINT M (1 2 3)", - "LINESTRING M (1 2 3, 4 5 6)", - "POLYGON M ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", - "MULTIPOINT M ((1 2 3), (4 5 6))", - "MULTILINESTRING M ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", - "MULTIPOLYGON M (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", - "GEOMETRYCOLLECTION M (POINT M (1 2 3))", + "POINT M(1 2 3)", + "LINESTRING M(1 2 3,4 5 6)", + "POLYGON M((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))", + "MULTIPOINT M((1 2 3),(4 5 6))", + "MULTILINESTRING M((1 2 3,4 5 6),(7 8 9,10 11 12))", + "MULTIPOLYGON M(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))", + "GEOMETRYCOLLECTION M(POINT M(1 2 3))", // XYZM dimensions - "POINT ZM (1 2 3 4)", - "LINESTRING ZM (1 2 3 4, 5 6 7 8)", - "POLYGON ZM ((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3))", - "MULTIPOINT ZM ((1 2 3 4), (5 6 7 8))", - "MULTILINESTRING ZM ((1 2 3 4, 5 6 7 8), (9 10 11 12, 13 14 15 16))", - "MULTIPOLYGON ZM (((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3)))", - "GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4))", + "POINT ZM(1 2 3 4)", + "LINESTRING ZM(1 2 3 4,5 6 7 8)", + "POLYGON ZM((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3))", + "MULTIPOINT ZM((1 2 3 4),(5 6 7 8))", + "MULTILINESTRING ZM((1 2 3 4,5 6 7 8),(9 10 11 12,13 14 15 16))", + "MULTIPOLYGON ZM(((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3)))", + "GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4))", // Empty geometries "POINT EMPTY", "LINESTRING EMPTY", From 559439fdf9bfb554a76ed1c991f8327bd4b1dc42 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 23:45:06 -0600 Subject: [PATCH 09/16] test empties --- rust/sedona-geometry/src/ewkb_factory.rs | 26 +++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/rust/sedona-geometry/src/ewkb_factory.rs b/rust/sedona-geometry/src/ewkb_factory.rs index 53397f915..6a19d3870 100644 --- a/rust/sedona-geometry/src/ewkb_factory.rs +++ b/rust/sedona-geometry/src/ewkb_factory.rs @@ -280,7 +280,7 @@ mod test { } } - const ROUNDTRIP_CASES: [&str; 35] = [ + const ROUNDTRIP_CASES: [&str; 56] = [ // XY dimensions "POINT(1 2)", "LINESTRING(1 2,3 4,5 6)", @@ -321,5 +321,29 @@ mod test { "MULTILINESTRING EMPTY", "MULTIPOLYGON EMPTY", "GEOMETRYCOLLECTION EMPTY", + // Empty geometries Z + "POINT Z EMPTY", + "LINESTRING Z EMPTY", + "POLYGON Z EMPTY", + "MULTIPOINT Z EMPTY", + "MULTILINESTRING Z EMPTY", + "MULTIPOLYGON Z EMPTY", + "GEOMETRYCOLLECTION Z EMPTY", + // Empty geometries M + "POINT M EMPTY", + "LINESTRING M EMPTY", + "POLYGON M EMPTY", + "MULTIPOINT M EMPTY", + "MULTILINESTRING M EMPTY", + "MULTIPOLYGON M EMPTY", + "GEOMETRYCOLLECTION M EMPTY", + // Empty geometries ZM + "POINT ZM EMPTY", + "LINESTRING ZM EMPTY", + "POLYGON ZM EMPTY", + "MULTIPOINT ZM EMPTY", + "MULTILINESTRING ZM EMPTY", + "MULTIPOLYGON ZM EMPTY", + "GEOMETRYCOLLECTION ZM EMPTY", ]; } From 14894eeaad7887fe66bd26301d55b0323fbba404 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 21 Jan 2026 23:47:53 -0600 Subject: [PATCH 10/16] better order --- rust/sedona-functions/src/st_asewkb.rs | 4 +- rust/sedona-geometry/src/ewkb_factory.rs | 48 ++++++++++++------------ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index c4602e143..ca65c1fcb 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -99,7 +99,7 @@ impl SedonaScalarKernel for STAsEWKB { executor.execute_wkb_void(|maybe_wkb| { match maybe_wkb { Some(wkb) => { - write_ewkb_geometry(&wkb, maybe_srid, &mut builder) + write_ewkb_geometry(&mut builder, &wkb, maybe_srid) .map_err(|e| exec_datafusion_err!("EWKB writer error {e}"))?; builder.append_value([]); } @@ -165,7 +165,7 @@ impl SedonaScalarKernel for STAsEWKBItemCrs { executor.execute_wkb_void(|maybe_wkb| { match maybe_wkb { Some(wkb) => { - write_ewkb_geometry(&wkb, srid_iter.next().unwrap()?, &mut builder) + write_ewkb_geometry(&mut builder, &wkb, srid_iter.next().unwrap()?) .map_err(|e| exec_datafusion_err!("EWKB writer error {e}"))?; builder.append_value([]); } diff --git a/rust/sedona-geometry/src/ewkb_factory.rs b/rust/sedona-geometry/src/ewkb_factory.rs index 6a19d3870..d7b19adc1 100644 --- a/rust/sedona-geometry/src/ewkb_factory.rs +++ b/rust/sedona-geometry/src/ewkb_factory.rs @@ -30,21 +30,21 @@ const EWKB_M_BIT: u32 = 0x40000000; const EWKB_SRID_BIT: u32 = 0x20000000; pub fn write_ewkb_geometry( + buf: &mut impl Write, geom: &Wkb, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { match geom.as_type() { - geo_traits::GeometryType::Point(p) => write_ewkb_point(p, srid, buf), - geo_traits::GeometryType::LineString(ls) => write_ewkb_line_string(ls, srid, buf), - geo_traits::GeometryType::Polygon(poly) => write_ewkb_polygon(poly, srid, buf), - geo_traits::GeometryType::MultiPoint(mp) => write_ewkb_multi_point(mp, srid, buf), + geo_traits::GeometryType::Point(p) => write_ewkb_point(buf, p, srid), + geo_traits::GeometryType::LineString(ls) => write_ewkb_line_string(buf, ls, srid), + geo_traits::GeometryType::Polygon(poly) => write_ewkb_polygon(buf, poly, srid), + geo_traits::GeometryType::MultiPoint(mp) => write_ewkb_multi_point(buf, mp, srid), geo_traits::GeometryType::MultiLineString(mls) => { - write_ewkb_multi_line_string(mls, srid, buf) + write_ewkb_multi_line_string(buf, mls, srid) } - geo_traits::GeometryType::MultiPolygon(mpoly) => write_ewkb_multi_polygon(mpoly, srid, buf), + geo_traits::GeometryType::MultiPolygon(mpoly) => write_ewkb_multi_polygon(buf, mpoly, srid), geo_traits::GeometryType::GeometryCollection(gc) => { - write_ewkb_geometry_collection(gc, srid, buf) + write_ewkb_geometry_collection(buf, gc, srid) } _ => Err(SedonaGeometryError::Invalid( "Unsupported EWKB geometry type".to_string(), @@ -53,9 +53,9 @@ pub fn write_ewkb_geometry( } fn write_ewkb_point( + buf: &mut impl Write, geom: &wkb::reader::Point, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?; match geom.byte_order() { @@ -74,9 +74,9 @@ fn write_ewkb_point( } fn write_ewkb_line_string( + buf: &mut impl Write, geom: &wkb::reader::LineString, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?; let num_coords = geom.num_coords() as u32; @@ -94,8 +94,8 @@ fn write_ewkb_line_string( } fn write_linearring( - geom: &wkb::reader::LinearRing, buf: &mut impl Write, + geom: &wkb::reader::LinearRing, ) -> Result<(), SedonaGeometryError> { let num_coords = geom.num_coords() as u32; buf.write_all(&num_coords.to_le_bytes())?; @@ -112,84 +112,84 @@ fn write_linearring( } fn write_ewkb_polygon( + buf: &mut impl Write, geom: &wkb::reader::Polygon, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(3, geom.dimension(), srid, buf)?; let num_rings = geom.num_interiors() as u32 + geom.exterior().is_some() as u32; buf.write_all(&num_rings.to_le_bytes())?; if let Some(exterior) = geom.exterior() { - write_linearring(exterior, buf)?; + write_linearring(buf, exterior)?; } for interior in geom.interiors() { - write_linearring(interior, buf)?; + write_linearring(buf, interior)?; } Ok(()) } fn write_ewkb_multi_point( + buf: &mut impl Write, geom: &wkb::reader::MultiPoint, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; let num_children = geom.num_points() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.points() { - write_ewkb_point(&child, None, buf)?; + write_ewkb_point(buf, &child, None)?; } Ok(()) } fn write_ewkb_multi_line_string( + buf: &mut impl Write, geom: &wkb::reader::MultiLineString, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(5, geom.dimension(), srid, buf)?; let num_children = geom.num_line_strings() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.line_strings() { - write_ewkb_line_string(child, None, buf)?; + write_ewkb_line_string(buf, child, None)?; } Ok(()) } fn write_ewkb_multi_polygon( + buf: &mut impl Write, geom: &wkb::reader::MultiPolygon, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(6, geom.dimension(), srid, buf)?; let num_children = geom.num_polygons() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.polygons() { - write_ewkb_polygon(child, None, buf)?; + write_ewkb_polygon(buf, child, None)?; } Ok(()) } fn write_ewkb_geometry_collection( + buf: &mut impl Write, geom: &wkb::reader::GeometryCollection, srid: Option, - buf: &mut impl Write, ) -> Result<(), SedonaGeometryError> { write_geometry_type_and_srid(7, geom.dimension(), srid, buf)?; let num_children = geom.num_geometries() as u32; buf.write_all(&num_children.to_le_bytes())?; for child in geom.geometries() { - write_ewkb_geometry(child, None, buf)?; + write_ewkb_geometry(buf, child, None)?; } Ok(()) @@ -244,10 +244,10 @@ mod test { let wkb_geom = wkb::reader::read_wkb(&iso_wkb).unwrap(); let mut ewkb_no_srid = Vec::new(); - write_ewkb_geometry(&wkb_geom, None, &mut ewkb_no_srid).unwrap(); + write_ewkb_geometry(&mut ewkb_no_srid, &wkb_geom, None).unwrap(); let mut ewkb_with_srid = Vec::new(); - write_ewkb_geometry(&wkb_geom, Some(4326), &mut ewkb_with_srid).unwrap(); + write_ewkb_geometry(&mut ewkb_with_srid, &wkb_geom, Some(4326)).unwrap(); // Check that the ewkbs have the correct number of bytes assert_eq!( From da872d68742e121e885c96ce6e7efe5823bd5f3a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 22 Jan 2026 00:16:10 -0600 Subject: [PATCH 11/16] function tests --- rust/sedona-functions/src/st_asewkb.rs | 70 ++++++++++++++++++++------ 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index ca65c1fcb..9c78239ca 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -163,9 +163,10 @@ impl SedonaScalarKernel for STAsEWKBItemCrs { }); executor.execute_wkb_void(|maybe_wkb| { + let maybe_srid = srid_iter.next().unwrap()?; match maybe_wkb { Some(wkb) => { - write_ewkb_geometry(&mut builder, &wkb, srid_iter.next().unwrap()?) + write_ewkb_geometry(&mut builder, &wkb, maybe_srid) .map_err(|e| exec_datafusion_err!("EWKB writer error {e}"))?; builder.append_value([]); } @@ -181,15 +182,21 @@ impl SedonaScalarKernel for STAsEWKBItemCrs { #[cfg(test)] mod tests { - use arrow_array::{ArrayRef, BinaryArray, BinaryViewArray}; + use arrow_array::{ArrayRef, BinaryArray}; use datafusion_common::scalar::ScalarValue; use datafusion_expr::ScalarUDF; use rstest::rstest; - use sedona_schema::datatypes::{ - WKB_GEOGRAPHY, WKB_GEOGRAPHY_ITEM_CRS, WKB_GEOMETRY, WKB_GEOMETRY_ITEM_CRS, - WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOMETRY, + use sedona_schema::{ + crs::lnglat, + datatypes::{ + Edges, WKB_GEOGRAPHY, WKB_GEOGRAPHY_ITEM_CRS, WKB_GEOMETRY, WKB_GEOMETRY_ITEM_CRS, + WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOMETRY, + }, + }; + use sedona_testing::{ + create::{create_array_item_crs, create_scalar_item_crs}, + testers::ScalarUdfTester, }; - use sedona_testing::testers::ScalarUdfTester; use super::*; @@ -198,6 +205,11 @@ mod tests { 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, ]; + const POINT12_LNGLAT: [u8; 25] = [ + 0x01, 0x01, 0x00, 0x00, 0x20, 0xe6, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xf0, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, + ]; + #[test] fn udf_metadata() { let udf: ScalarUDF = st_asewkb_udf().into(); @@ -206,10 +218,12 @@ mod tests { } #[rstest] - fn udf_geometry_input( + fn udf_no_srid( #[values( WKB_GEOMETRY, WKB_GEOGRAPHY, + WKB_VIEW_GEOMETRY, + WKB_VIEW_GEOGRAPHY, WKB_GEOMETRY_ITEM_CRS.clone(), WKB_GEOGRAPHY_ITEM_CRS.clone(), )] @@ -238,23 +252,29 @@ mod tests { } #[rstest] - fn udf_geometry_view_input( - #[values(WKB_VIEW_GEOMETRY, WKB_VIEW_GEOGRAPHY)] sedona_type: SedonaType, + fn udf_srid_from_type( + #[values( + SedonaType::Wkb(Edges::Planar, lnglat()), + SedonaType::Wkb(Edges::Spherical, lnglat()) + )] + sedona_type: SedonaType, ) { let udf = st_asewkb_udf(); let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); assert_eq!( tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(), - ScalarValue::BinaryView(Some(POINT12.to_vec())) + ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec())) ); assert_eq!( tester.invoke_wkb_scalar(None).unwrap(), - ScalarValue::BinaryView(None) + ScalarValue::Binary(None) ); - let expected_array: BinaryViewArray = [Some(POINT12), None, Some(POINT12)].iter().collect(); + let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None, Some(POINT12_LNGLAT)] + .iter() + .collect(); assert_eq!( &tester .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT (1 2)")]) @@ -264,8 +284,28 @@ mod tests { } #[test] - fn aliases() { - let udf: ScalarUDF = st_asewkb_udf().into(); - assert!(udf.aliases().contains(&"st_aswkb".to_string())); + fn udf_srid_from_item_crs() { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY_ITEM_CRS.clone()]); + + let scalar_with_srid = + create_scalar_item_crs(Some("POINT (1 2)"), Some("EPSG:4326"), &WKB_GEOMETRY); + assert_eq!( + tester.invoke_scalar(scalar_with_srid).unwrap(), + ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec())) + ); + + let array_with_srid = create_array_item_crs( + &[Some("POINT (1 2)"), None, Some("POINT (1 2)")], + [Some("EPSG:4326"), None, Some("EPSG:4326")], + &WKB_GEOMETRY, + ); + let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None, Some(POINT12_LNGLAT)] + .iter() + .collect(); + assert_eq!( + &tester.invoke_array(array_with_srid).unwrap(), + &(Arc::new(expected_array) as ArrayRef) + ); } } From 90696450e1e4a69082b195d7c944541aff7ab346 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 22 Jan 2026 00:18:00 -0600 Subject: [PATCH 12/16] pin pandas --- python/sedonadb/pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sedonadb/pyproject.toml b/python/sedonadb/pyproject.toml index 8e10481c2..cfc8e2ca6 100644 --- a/python/sedonadb/pyproject.toml +++ b/python/sedonadb/pyproject.toml @@ -38,7 +38,9 @@ test = [ "duckdb", "geoarrow-pyarrow", "geopandas", - "pandas", + # Short term workaround for Pandas 3.0 until this is fixed + # https://github.com/apache/sedona-db/issues/536 + "pandas<3", "polars", "pytest", ] From e0a569d15124e123f53aeb317cf71d21cfb55f97 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 22 Jan 2026 00:19:58 -0600 Subject: [PATCH 13/16] Update rust/sedona-functions/src/st_asewkb.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- rust/sedona-functions/src/st_asewkb.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index 9c78239ca..c89a43a82 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -35,7 +35,7 @@ use crate::executor::WkbExecutor; /// ST_AsEWKB() scalar UDF implementation /// -/// An implementation of WKB writing using GeoRust's wkt crate. +/// An implementation of EWKB writing using Sedona's geometry EWKB/WKB facilities. pub fn st_asewkb_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( "st_asewkb", From 44de9995d4b3d70d6fab8a98337141c5bd2b1677 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 22 Jan 2026 15:44:37 -0600 Subject: [PATCH 14/16] remove test dep change --- python/sedonadb/pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/sedonadb/pyproject.toml b/python/sedonadb/pyproject.toml index cfc8e2ca6..8e10481c2 100644 --- a/python/sedonadb/pyproject.toml +++ b/python/sedonadb/pyproject.toml @@ -38,9 +38,7 @@ test = [ "duckdb", "geoarrow-pyarrow", "geopandas", - # Short term workaround for Pandas 3.0 until this is fixed - # https://github.com/apache/sedona-db/issues/536 - "pandas<3", + "pandas", "polars", "pytest", ] From 36c7846def47d393137710af4fa4c83a60af8c69 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 22 Jan 2026 15:44:45 -0600 Subject: [PATCH 15/16] add nested geometry collection test --- rust/sedona-geometry/src/ewkb_factory.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rust/sedona-geometry/src/ewkb_factory.rs b/rust/sedona-geometry/src/ewkb_factory.rs index d7b19adc1..9a54fa736 100644 --- a/rust/sedona-geometry/src/ewkb_factory.rs +++ b/rust/sedona-geometry/src/ewkb_factory.rs @@ -280,7 +280,7 @@ mod test { } } - const ROUNDTRIP_CASES: [&str; 56] = [ + const ROUNDTRIP_CASES: [&str; 60] = [ // XY dimensions "POINT(1 2)", "LINESTRING(1 2,3 4,5 6)", @@ -289,6 +289,7 @@ mod test { "MULTILINESTRING((1 2,3 4),(5 6,7 8))", "MULTIPOLYGON(((0 1,2 0,2 3,0 3,0 1)))", "GEOMETRYCOLLECTION(POINT(1 2),LINESTRING(3 4,5 6))", + "GEOMETRYCOLLECTION(GEOMETRYCOLLECTION(POINT(1 2)))", // XYZ dimensions "POINT Z(1 2 3)", "LINESTRING Z(1 2 3,4 5 6)", @@ -297,6 +298,7 @@ mod test { "MULTILINESTRING Z((1 2 3,4 5 6),(7 8 9,10 11 12))", "MULTIPOLYGON Z(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))", "GEOMETRYCOLLECTION Z(POINT Z(1 2 3))", + "GEOMETRYCOLLECTION Z(GEOMETRYCOLLECTION Z(POINT Z(1 2 3)))", // XYM dimensions "POINT M(1 2 3)", "LINESTRING M(1 2 3,4 5 6)", @@ -305,6 +307,7 @@ mod test { "MULTILINESTRING M((1 2 3,4 5 6),(7 8 9,10 11 12))", "MULTIPOLYGON M(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))", "GEOMETRYCOLLECTION M(POINT M(1 2 3))", + "GEOMETRYCOLLECTION M(GEOMETRYCOLLECTION M(POINT M(1 2 3)))", // XYZM dimensions "POINT ZM(1 2 3 4)", "LINESTRING ZM(1 2 3 4,5 6 7 8)", @@ -313,6 +316,7 @@ mod test { "MULTILINESTRING ZM((1 2 3 4,5 6 7 8),(9 10 11 12,13 14 15 16))", "MULTIPOLYGON ZM(((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3)))", "GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4))", + "GEOMETRYCOLLECTION ZM(GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4)))", // Empty geometries "POINT EMPTY", "LINESTRING EMPTY", From 1c6501fb69bcf973eaed1126e56e3c9a439b989f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 22 Jan 2026 16:37:56 -0600 Subject: [PATCH 16/16] test questionable crs values --- rust/sedona-common/src/error.rs | 17 ++++-- rust/sedona-functions/src/st_asewkb.rs | 75 ++++++++++++++++++++++---- rust/sedona-schema/src/crs.rs | 11 ++-- 3 files changed, 83 insertions(+), 20 deletions(-) diff --git a/rust/sedona-common/src/error.rs b/rust/sedona-common/src/error.rs index 101db2c45..5b7c04b06 100644 --- a/rust/sedona-common/src/error.rs +++ b/rust/sedona-common/src/error.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -/// Macro to create Sedona Internal Error that avoids the misleading error message from -/// DataFusionError::Internal. +/// Macro to create Sedona Internal Error from places such as `map_err()` that +/// require a DataFusionError instead of an Err #[macro_export] -macro_rules! sedona_internal_err { +macro_rules! sedona_internal_datafusion_err { ($($args:expr),*) => {{ let msg = std::format!( "SedonaDB internal error: {}{}.\nThis issue was likely caused by a bug in SedonaDB's code. \ @@ -28,7 +28,16 @@ macro_rules! sedona_internal_err { datafusion_common::DataFusionError::get_back_trace(), ); // We avoid using Internal to avoid the message suggesting it's internal to DataFusion - Err(datafusion_common::DataFusionError::External(msg.into())) + datafusion_common::DataFusionError::External(msg.into()) + }}; +} + +/// Macro to create Sedona Internal Error that avoids the misleading error message from +/// DataFusionError::Internal. +#[macro_export] +macro_rules! sedona_internal_err { + ($($args:expr),*) => {{ + Err($crate::sedona_internal_datafusion_err!($($args),*)) }}; } diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs index c89a43a82..76cab1623 100644 --- a/rust/sedona-functions/src/st_asewkb.rs +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -87,8 +87,9 @@ impl SedonaScalarKernel for STAsEWKB { let maybe_srid = match &arg_types[0] { SedonaType::Wkb(_, crs) | SedonaType::WkbView(_, crs) => match crs { Some(crs) => match crs.srid()? { - Some(srid) if srid != 0 => Some(srid), - _ => None, + Some(0) => None, + Some(srid) => Some(srid), + _ => return exec_err!("CRS {crs} cannot be represented by a single SRID"), }, None => None, }, @@ -153,13 +154,20 @@ impl SedonaScalarKernel for STAsEWKBItemCrs { .into_iter() .map(|maybe_crs_str| match maybe_crs_str { None => Ok(None), - Some(crs_str) => match deserialize_crs(crs_str)? { - None => Ok(None), - Some(crs) => match crs.srid()? { - Some(srid) => Ok(Some(srid)), - None => exec_err!("CRS {crs} cannot be represented by a single SRID"), - }, - }, + Some(crs_str) => { + match deserialize_crs(crs_str) + .map_err(|e| exec_datafusion_err!("{}", e.message()))? + { + None => Ok(None), + Some(crs) => match crs.srid()? { + Some(0) => Ok(None), + Some(srid) => Ok(Some(srid)), + _ => { + exec_err!("CRS {crs} cannot be represented by a single SRID") + } + }, + } + } }); executor.execute_wkb_void(|maybe_wkb| { @@ -308,4 +316,53 @@ mod tests { &(Arc::new(expected_array) as ArrayRef) ); } + + #[test] + fn udf_invalid_type_crs() { + let udf = st_asewkb_udf(); + + let crs_where_srid_returns_none = deserialize_crs("EPSG:9999999999").unwrap(); + let sedona_type = SedonaType::Wkb(Edges::Planar, crs_where_srid_returns_none); + + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + let err = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap_err(); + assert_eq!( + err.message(), + "CRS epsg:9999999999 cannot be represented by a single SRID" + ); + } + + #[test] + fn udf_invalid_item_crs() { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY_ITEM_CRS.clone()]); + + // Very large SRID + let scalar_with_srid_outside_u32 = create_scalar_item_crs( + Some("POINT (1 2)"), + Some("EPSG:999999999999"), + &WKB_GEOMETRY, + ); + let err = tester + .invoke_scalar(scalar_with_srid_outside_u32) + .unwrap_err(); + assert_eq!( + err.message(), + "CRS epsg:999999999999 cannot be represented by a single SRID" + ); + + // CRS that fails to parse in deserialize_crs() + let scalar_with_unparsable_crs = create_scalar_item_crs( + Some("POINT (1 2)"), + Some("This is invalid JSON and also not auth:code"), + &WKB_GEOMETRY, + ); + let err = tester + .invoke_scalar(scalar_with_unparsable_crs) + .unwrap_err(); + assert_eq!( + err.message(), + "Error deserializing PROJJSON Crs: expected value at line 1 column 1" + ); + } } diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs index a4649b15f..3813df11c 100644 --- a/rust/sedona-schema/src/crs.rs +++ b/rust/sedona-schema/src/crs.rs @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use datafusion_common::{DataFusionError, Result}; +use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result}; use lru::LruCache; use std::cell::RefCell; use std::fmt::{Debug, Display}; @@ -318,9 +318,8 @@ impl FromStr for ProjJSON { type Err = DataFusionError; fn from_str(s: &str) -> Result { - let value: Value = serde_json::from_str(s).map_err(|err| { - DataFusionError::Internal(format!("Error deserializing PROJJSON Crs: {err}")) - })?; + let value: Value = serde_json::from_str(s) + .map_err(|err| plan_datafusion_err!("Error deserializing PROJJSON Crs: {err}"))?; Self::try_new(value) } @@ -329,9 +328,7 @@ impl FromStr for ProjJSON { impl ProjJSON { pub fn try_new(value: Value) -> Result { if !value.is_object() { - return Err(DataFusionError::Internal(format!( - "Can't create PROJJSON from non-object: {value}" - ))); + return plan_err!("Can't create PROJJSON from non-object: {value}"); } Ok(Self { value })