diff --git a/python/sedonadb/python/sedonadb/testing.py b/python/sedonadb/python/sedonadb/testing.py index ec8b870a3..eb2710601 100644 --- a/python/sedonadb/python/sedonadb/testing.py +++ b/python/sedonadb/python/sedonadb/testing.py @@ -635,11 +635,15 @@ def __init__(self, uri=None): cur.execute("SET max_parallel_workers_per_gather TO 0") -def geom_or_null(arg): +def geom_or_null(arg, srid=None): """Format SQL expression for a geometry object or NULL""" if arg is None: return "NULL" - return f"ST_GeomFromText('{arg}')" + + if srid is None: + return f"ST_GeomFromText('{arg}')" + else: + return f"ST_GeomFromEWKT('SRID={srid};{arg}')" def geog_or_null(arg): diff --git a/python/sedonadb/tests/functions/test_wkb.py b/python/sedonadb/tests/functions/test_wkb.py new file mode 100644 index 000000000..424d9a36b --- /dev/null +++ b/python/sedonadb/tests/functions/test_wkb.py @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import shapely +from sedonadb.testing import PostGIS, SedonaDB, geom_or_null + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize("srid", [None, 4326]) +@pytest.mark.parametrize( + "geom", + [ + # XY dimensions + "POINT (1 2)", + "LINESTRING (1 2, 3 4, 5 6)", + "POLYGON ((0 1, 2 0, 2 3, 0 3, 0 1))", + "MULTIPOINT ((1 2), (3 4))", + "MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", + "MULTIPOLYGON (((0 1, 2 0, 2 3, 0 3, 0 1)))", + "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", + # XYZ dimensions + "POINT Z (1 2 3)", + "LINESTRING Z (1 2 3, 4 5 6)", + "POLYGON Z ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", + "MULTIPOINT Z ((1 2 3), (4 5 6))", + "MULTILINESTRING Z ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", + "MULTIPOLYGON Z (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", + "GEOMETRYCOLLECTION Z (POINT Z (1 2 3))", + # XYM dimensions + "POINT M (1 2 3)", + "LINESTRING M (1 2 3, 4 5 6)", + "POLYGON M ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))", + "MULTIPOINT M ((1 2 3), (4 5 6))", + "MULTILINESTRING M ((1 2 3, 4 5 6), (7 8 9, 10 11 12))", + "MULTIPOLYGON M (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))", + "GEOMETRYCOLLECTION M (POINT M (1 2 3))", + # XYZM dimensions + "POINT ZM (1 2 3 4)", + "LINESTRING ZM (1 2 3 4, 5 6 7 8)", + "POLYGON ZM ((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3))", + "MULTIPOINT ZM ((1 2 3 4), (5 6 7 8))", + "MULTILINESTRING ZM ((1 2 3 4, 5 6 7 8), (9 10 11 12, 13 14 15 16))", + "MULTIPOLYGON ZM (((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3)))", + "GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4))", + # Empty geometries + "POINT EMPTY", + "LINESTRING EMPTY", + "POLYGON EMPTY", + "MULTIPOINT EMPTY", + "MULTILINESTRING EMPTY", + "MULTIPOLYGON EMPTY", + "GEOMETRYCOLLECTION EMPTY", + # NULL + None, + ], +) +def test_st_asewkb(eng, srid, geom): + eng = eng.create_or_skip() + + if geom is not None: + shapely_geom = shapely.from_wkt(geom) + if srid is not None: + shapely_geom = shapely.set_srid(shapely_geom, srid) + write_srid = True + else: + write_srid = False + + expected = shapely.to_wkb( + shapely_geom, + output_dimension=4, + byte_order=1, + flavor="extended", + include_srid=write_srid, + ) + else: + expected = None + + eng.assert_query_result(f"SELECT ST_AsEWKB({geom_or_null(geom, srid)})", expected) diff --git a/rust/sedona-common/src/error.rs b/rust/sedona-common/src/error.rs index 101db2c45..5b7c04b06 100644 --- a/rust/sedona-common/src/error.rs +++ b/rust/sedona-common/src/error.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -/// Macro to create Sedona Internal Error that avoids the misleading error message from -/// DataFusionError::Internal. +/// Macro to create Sedona Internal Error from places such as `map_err()` that +/// require a DataFusionError instead of an Err #[macro_export] -macro_rules! sedona_internal_err { +macro_rules! sedona_internal_datafusion_err { ($($args:expr),*) => {{ let msg = std::format!( "SedonaDB internal error: {}{}.\nThis issue was likely caused by a bug in SedonaDB's code. \ @@ -28,7 +28,16 @@ macro_rules! sedona_internal_err { datafusion_common::DataFusionError::get_back_trace(), ); // We avoid using Internal to avoid the message suggesting it's internal to DataFusion - Err(datafusion_common::DataFusionError::External(msg.into())) + datafusion_common::DataFusionError::External(msg.into()) + }}; +} + +/// Macro to create Sedona Internal Error that avoids the misleading error message from +/// DataFusionError::Internal. +#[macro_export] +macro_rules! sedona_internal_err { + ($($args:expr),*) => {{ + Err($crate::sedona_internal_datafusion_err!($($args),*)) }}; } diff --git a/rust/sedona-functions/src/executor.rs b/rust/sedona-functions/src/executor.rs index f1d679d28..8809ac0ef 100644 --- a/rust/sedona-functions/src/executor.rs +++ b/rust/sedona-functions/src/executor.rs @@ -383,6 +383,20 @@ impl ScalarGeo for ScalarValue { | ScalarValue::BinaryView(maybe_item) | ScalarValue::LargeBinary(maybe_item) => Ok(maybe_item.as_deref()), ScalarValue::Null => Ok(None), + ScalarValue::Struct(s) + if s.fields().len() == 2 + && s.fields()[0].name() == "item" + && s.fields()[1].name() == "crs" => + { + let item_type = SedonaType::from_storage_field(&s.fields()[0])?; + let mut out = None; + s.column(0).iter_as_wkb_bytes(&item_type, 1, |v| { + out = v; + Ok(()) + })?; + + Ok(out) + } _ => sedona_internal_err!("Can't iterate over {:?} ScalarValue as &[u8]", self), } } diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 6e38e4262..6e8f884bd 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -28,6 +28,7 @@ mod st_affine_helpers; pub mod st_analyze_agg; mod st_area; mod st_asbinary; +mod st_asewkb; mod st_asgeojson; mod st_astext; mod st_azimuth; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index 8161923a5..14405409d 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -66,6 +66,7 @@ pub fn default_function_set() -> FunctionSet { crate::st_affine::st_affine_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, + crate::st_asewkb::st_asewkb_udf, crate::st_asgeojson::st_asgeojson_udf, crate::st_astext::st_astext_udf, crate::st_azimuth::st_azimuth_udf, diff --git a/rust/sedona-functions/src/st_asewkb.rs b/rust/sedona-functions/src/st_asewkb.rs new file mode 100644 index 000000000..76cab1623 --- /dev/null +++ b/rust/sedona-functions/src/st_asewkb.rs @@ -0,0 +1,368 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use arrow_array::builder::BinaryBuilder; +use arrow_schema::DataType; +use datafusion_common::{ + cast::{as_string_view_array, as_struct_array}, + error::Result, + exec_datafusion_err, exec_err, ScalarValue, +}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_common::sedona_internal_err; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::{ewkb_factory::write_ewkb_geometry, wkb_factory::WKB_MIN_PROBABLE_BYTES}; +use sedona_schema::{crs::deserialize_crs, datatypes::SedonaType, matchers::ArgMatcher}; + +use crate::executor::WkbExecutor; + +/// ST_AsEWKB() scalar UDF implementation +/// +/// An implementation of EWKB writing using Sedona's geometry EWKB/WKB facilities. +pub fn st_asewkb_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_asewkb", + vec![Arc::new(STAsEWKBItemCrs {}), Arc::new(STAsEWKB {})], + Volatility::Immutable, + Some(st_asewkb_doc()), + ) +} + +fn st_asewkb_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + r#"Return the Extended Well-Known Binary (EWKB) representation of a geometry or geography. + +Compared to ST_AsBinary(), this function embeds an integer SRID derived from the type or derived +from the item-level CRS for item CRS types. This is particularly useful for integration with +PostGIS"#, + "ST_AsEWKB (A: Geometry)", + ) + .with_argument("geom", "geometry: Input geometry or geography") + .with_sql_example("SELECT ST_AsEWKB(ST_Point(1.0, 2.0, 4326))") + .build() +} + +#[derive(Debug)] +struct STAsEWKB {} + +impl SedonaScalarKernel for STAsEWKB { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry_or_geography()], + SedonaType::Arrow(DataType::Binary), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let maybe_srid = match &arg_types[0] { + SedonaType::Wkb(_, crs) | SedonaType::WkbView(_, crs) => match crs { + Some(crs) => match crs.srid()? { + Some(0) => None, + Some(srid) => Some(srid), + _ => return exec_err!("CRS {crs} cannot be represented by a single SRID"), + }, + None => None, + }, + SedonaType::Arrow(DataType::Null) => None, + _ => return sedona_internal_err!("Unexpected input to invoke_batch in ST_AsEWKB"), + }; + + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + write_ewkb_geometry(&mut builder, &wkb, maybe_srid) + .map_err(|e| exec_datafusion_err!("EWKB writer error {e}"))?; + builder.append_value([]); + } + None => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct STAsEWKBItemCrs {} + +impl SedonaScalarKernel for STAsEWKBItemCrs { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_item_crs()], + SedonaType::Arrow(DataType::Binary), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let crs_array_ref = match &args[0] { + ColumnarValue::Array(array) => { + let struct_array = as_struct_array(array)?; + struct_array.column(1).clone() + } + ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => { + struct_array.column(1).clone() + } + _ => return sedona_internal_err!("Unexpected item_crs type"), + }; + + let crs_array = as_string_view_array(&crs_array_ref)?; + let mut srid_iter = crs_array + .into_iter() + .map(|maybe_crs_str| match maybe_crs_str { + None => Ok(None), + Some(crs_str) => { + match deserialize_crs(crs_str) + .map_err(|e| exec_datafusion_err!("{}", e.message()))? + { + None => Ok(None), + Some(crs) => match crs.srid()? { + Some(0) => Ok(None), + Some(srid) => Ok(Some(srid)), + _ => { + exec_err!("CRS {crs} cannot be represented by a single SRID") + } + }, + } + } + }); + + executor.execute_wkb_void(|maybe_wkb| { + let maybe_srid = srid_iter.next().unwrap()?; + match maybe_wkb { + Some(wkb) => { + write_ewkb_geometry(&mut builder, &wkb, maybe_srid) + .map_err(|e| exec_datafusion_err!("EWKB writer error {e}"))?; + builder.append_value([]); + } + None => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[cfg(test)] +mod tests { + use arrow_array::{ArrayRef, BinaryArray}; + use datafusion_common::scalar::ScalarValue; + use datafusion_expr::ScalarUDF; + use rstest::rstest; + use sedona_schema::{ + crs::lnglat, + datatypes::{ + Edges, WKB_GEOGRAPHY, WKB_GEOGRAPHY_ITEM_CRS, WKB_GEOMETRY, WKB_GEOMETRY_ITEM_CRS, + WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOMETRY, + }, + }; + use sedona_testing::{ + create::{create_array_item_crs, create_scalar_item_crs}, + testers::ScalarUdfTester, + }; + + use super::*; + + const POINT12: [u8; 21] = [ + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, + ]; + + const POINT12_LNGLAT: [u8; 25] = [ + 0x01, 0x01, 0x00, 0x00, 0x20, 0xe6, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xf0, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, + ]; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = st_asewkb_udf().into(); + assert_eq!(udf.name(), "st_asewkb"); + assert!(udf.documentation().is_some()) + } + + #[rstest] + fn udf_no_srid( + #[values( + WKB_GEOMETRY, + WKB_GEOGRAPHY, + WKB_VIEW_GEOMETRY, + WKB_VIEW_GEOGRAPHY, + WKB_GEOMETRY_ITEM_CRS.clone(), + WKB_GEOGRAPHY_ITEM_CRS.clone(), + )] + sedona_type: SedonaType, + ) { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + assert_eq!( + tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(), + ScalarValue::Binary(Some(POINT12.to_vec())) + ); + + assert_eq!( + tester.invoke_wkb_scalar(None).unwrap(), + ScalarValue::Binary(None) + ); + + let expected_array: BinaryArray = [Some(POINT12), None, Some(POINT12)].iter().collect(); + assert_eq!( + &tester + .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT (1 2)")]) + .unwrap(), + &(Arc::new(expected_array) as ArrayRef) + ); + } + + #[rstest] + fn udf_srid_from_type( + #[values( + SedonaType::Wkb(Edges::Planar, lnglat()), + SedonaType::Wkb(Edges::Spherical, lnglat()) + )] + sedona_type: SedonaType, + ) { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + assert_eq!( + tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(), + ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec())) + ); + + assert_eq!( + tester.invoke_wkb_scalar(None).unwrap(), + ScalarValue::Binary(None) + ); + + let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None, Some(POINT12_LNGLAT)] + .iter() + .collect(); + assert_eq!( + &tester + .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT (1 2)")]) + .unwrap(), + &(Arc::new(expected_array) as ArrayRef) + ); + } + + #[test] + fn udf_srid_from_item_crs() { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY_ITEM_CRS.clone()]); + + let scalar_with_srid = + create_scalar_item_crs(Some("POINT (1 2)"), Some("EPSG:4326"), &WKB_GEOMETRY); + assert_eq!( + tester.invoke_scalar(scalar_with_srid).unwrap(), + ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec())) + ); + + let array_with_srid = create_array_item_crs( + &[Some("POINT (1 2)"), None, Some("POINT (1 2)")], + [Some("EPSG:4326"), None, Some("EPSG:4326")], + &WKB_GEOMETRY, + ); + let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None, Some(POINT12_LNGLAT)] + .iter() + .collect(); + assert_eq!( + &tester.invoke_array(array_with_srid).unwrap(), + &(Arc::new(expected_array) as ArrayRef) + ); + } + + #[test] + fn udf_invalid_type_crs() { + let udf = st_asewkb_udf(); + + let crs_where_srid_returns_none = deserialize_crs("EPSG:9999999999").unwrap(); + let sedona_type = SedonaType::Wkb(Edges::Planar, crs_where_srid_returns_none); + + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + let err = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap_err(); + assert_eq!( + err.message(), + "CRS epsg:9999999999 cannot be represented by a single SRID" + ); + } + + #[test] + fn udf_invalid_item_crs() { + let udf = st_asewkb_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY_ITEM_CRS.clone()]); + + // Very large SRID + let scalar_with_srid_outside_u32 = create_scalar_item_crs( + Some("POINT (1 2)"), + Some("EPSG:999999999999"), + &WKB_GEOMETRY, + ); + let err = tester + .invoke_scalar(scalar_with_srid_outside_u32) + .unwrap_err(); + assert_eq!( + err.message(), + "CRS epsg:999999999999 cannot be represented by a single SRID" + ); + + // CRS that fails to parse in deserialize_crs() + let scalar_with_unparsable_crs = create_scalar_item_crs( + Some("POINT (1 2)"), + Some("This is invalid JSON and also not auth:code"), + &WKB_GEOMETRY, + ); + let err = tester + .invoke_scalar(scalar_with_unparsable_crs) + .unwrap_err(); + assert_eq!( + err.message(), + "Error deserializing PROJJSON Crs: expected value at line 1 column 1" + ); + } +} diff --git a/rust/sedona-geometry/src/ewkb_factory.rs b/rust/sedona-geometry/src/ewkb_factory.rs new file mode 100644 index 000000000..9a54fa736 --- /dev/null +++ b/rust/sedona-geometry/src/ewkb_factory.rs @@ -0,0 +1,353 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::io::Write; + +use geo_traits::{ + GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait, + MultiPolygonTrait, PointTrait, PolygonTrait, +}; +use wkb::reader::Wkb; + +use crate::{error::SedonaGeometryError, wkb_factory::write_wkb_coord_trait}; + +const EWKB_Z_BIT: u32 = 0x80000000; +const EWKB_M_BIT: u32 = 0x40000000; +const EWKB_SRID_BIT: u32 = 0x20000000; + +pub fn write_ewkb_geometry( + buf: &mut impl Write, + geom: &Wkb, + srid: Option, +) -> Result<(), SedonaGeometryError> { + match geom.as_type() { + geo_traits::GeometryType::Point(p) => write_ewkb_point(buf, p, srid), + geo_traits::GeometryType::LineString(ls) => write_ewkb_line_string(buf, ls, srid), + geo_traits::GeometryType::Polygon(poly) => write_ewkb_polygon(buf, poly, srid), + geo_traits::GeometryType::MultiPoint(mp) => write_ewkb_multi_point(buf, mp, srid), + geo_traits::GeometryType::MultiLineString(mls) => { + write_ewkb_multi_line_string(buf, mls, srid) + } + geo_traits::GeometryType::MultiPolygon(mpoly) => write_ewkb_multi_polygon(buf, mpoly, srid), + geo_traits::GeometryType::GeometryCollection(gc) => { + write_ewkb_geometry_collection(buf, gc, srid) + } + _ => Err(SedonaGeometryError::Invalid( + "Unsupported EWKB geometry type".to_string(), + )), + } +} + +fn write_ewkb_point( + buf: &mut impl Write, + geom: &wkb::reader::Point, + srid: Option, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?; + match geom.byte_order() { + wkb::Endianness::BigEndian => match geom.coord() { + Some(c) => write_wkb_coord_trait(buf, &c)?, + None => { + for _ in 0..geom.dim().size() { + buf.write_all(&f64::NAN.to_le_bytes())?; + } + } + }, + wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?, + } + + Ok(()) +} + +fn write_ewkb_line_string( + buf: &mut impl Write, + geom: &wkb::reader::LineString, + srid: Option, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?; + let num_coords = geom.num_coords() as u32; + buf.write_all(&num_coords.to_le_bytes())?; + match geom.byte_order() { + wkb::Endianness::BigEndian => { + for c in geom.coords() { + write_wkb_coord_trait(buf, &c)?; + } + } + wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, + } + + Ok(()) +} + +fn write_linearring( + buf: &mut impl Write, + geom: &wkb::reader::LinearRing, +) -> Result<(), SedonaGeometryError> { + let num_coords = geom.num_coords() as u32; + buf.write_all(&num_coords.to_le_bytes())?; + match geom.byte_order() { + wkb::Endianness::BigEndian => { + for c in geom.coords() { + write_wkb_coord_trait(buf, &c)?; + } + } + wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?, + } + + Ok(()) +} + +fn write_ewkb_polygon( + buf: &mut impl Write, + geom: &wkb::reader::Polygon, + srid: Option, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(3, geom.dimension(), srid, buf)?; + let num_rings = geom.num_interiors() as u32 + geom.exterior().is_some() as u32; + buf.write_all(&num_rings.to_le_bytes())?; + + if let Some(exterior) = geom.exterior() { + write_linearring(buf, exterior)?; + } + + for interior in geom.interiors() { + write_linearring(buf, interior)?; + } + + Ok(()) +} + +fn write_ewkb_multi_point( + buf: &mut impl Write, + geom: &wkb::reader::MultiPoint, + srid: Option, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?; + let num_children = geom.num_points() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.points() { + write_ewkb_point(buf, &child, None)?; + } + + Ok(()) +} + +fn write_ewkb_multi_line_string( + buf: &mut impl Write, + geom: &wkb::reader::MultiLineString, + srid: Option, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(5, geom.dimension(), srid, buf)?; + let num_children = geom.num_line_strings() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.line_strings() { + write_ewkb_line_string(buf, child, None)?; + } + + Ok(()) +} + +fn write_ewkb_multi_polygon( + buf: &mut impl Write, + geom: &wkb::reader::MultiPolygon, + srid: Option, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(6, geom.dimension(), srid, buf)?; + let num_children = geom.num_polygons() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.polygons() { + write_ewkb_polygon(buf, child, None)?; + } + + Ok(()) +} + +fn write_ewkb_geometry_collection( + buf: &mut impl Write, + geom: &wkb::reader::GeometryCollection, + srid: Option, +) -> Result<(), SedonaGeometryError> { + write_geometry_type_and_srid(7, geom.dimension(), srid, buf)?; + let num_children = geom.num_geometries() as u32; + buf.write_all(&num_children.to_le_bytes())?; + + for child in geom.geometries() { + write_ewkb_geometry(buf, child, None)?; + } + + Ok(()) +} + +fn write_geometry_type_and_srid( + mut base_type: u32, + dimensions: wkb::reader::Dimension, + srid: Option, + buf: &mut impl Write, +) -> Result<(), SedonaGeometryError> { + buf.write_all(&[0x01])?; + + match dimensions { + wkb::reader::Dimension::Xy => {} + wkb::reader::Dimension::Xyz => base_type |= EWKB_Z_BIT, + wkb::reader::Dimension::Xym => base_type |= EWKB_M_BIT, + wkb::reader::Dimension::Xyzm => { + base_type |= EWKB_Z_BIT; + base_type |= EWKB_M_BIT; + } + } + + if let Some(srid) = srid { + base_type |= EWKB_SRID_BIT; + buf.write_all(&base_type.to_le_bytes())?; + buf.write_all(&srid.to_le_bytes())?; + } else { + buf.write_all(&base_type.to_le_bytes())?; + } + + Ok(()) +} + +#[cfg(test)] +mod test { + + use rstest::rstest; + use wkb::{writer::WriteOptions, Endianness}; + + use super::*; + + #[rstest] + fn test_roundtrip( + #[values(Endianness::LittleEndian, Endianness::BigEndian)] endianness: Endianness, + ) { + for wkt_str in ROUNDTRIP_CASES { + let wkt: wkt::Wkt = wkt_str.parse().unwrap(); + + let mut iso_wkb = Vec::new(); + wkb::writer::write_geometry(&mut iso_wkb, &wkt, &WriteOptions { endianness }).unwrap(); + let wkb_geom = wkb::reader::read_wkb(&iso_wkb).unwrap(); + + let mut ewkb_no_srid = Vec::new(); + write_ewkb_geometry(&mut ewkb_no_srid, &wkb_geom, None).unwrap(); + + let mut ewkb_with_srid = Vec::new(); + write_ewkb_geometry(&mut ewkb_with_srid, &wkb_geom, Some(4326)).unwrap(); + + // Check that the ewkbs have the correct number of bytes + assert_eq!( + ewkb_no_srid.len(), + iso_wkb.len(), + "incorrect number of bytes for case {wkt_str} without srid" + ); + assert_eq!( + ewkb_with_srid.len(), + ewkb_no_srid.len() + size_of::(), + "incorrect number of bytes for case {wkt_str} with srid" + ); + + // Check the rendered WKT of the no srid EWKB + let wkb_geom_roundtrip_no_srid = wkb::reader::read_wkb(&ewkb_no_srid).unwrap(); + let mut wkt_roundtrip_no_srid = String::new(); + wkt::to_wkt::write_geometry(&mut wkt_roundtrip_no_srid, &wkb_geom_roundtrip_no_srid) + .unwrap(); + assert_eq!(wkt_roundtrip_no_srid, wkt_str); + + // Check the rendered WKT of the srid EWKB + let wkb_geom_roundtrip_with_srid = wkb::reader::read_wkb(&ewkb_with_srid).unwrap(); + let mut wkt_roundtrip_with_srid = String::new(); + wkt::to_wkt::write_geometry( + &mut wkt_roundtrip_with_srid, + &wkb_geom_roundtrip_with_srid, + ) + .unwrap(); + assert_eq!(wkt_roundtrip_with_srid, wkt_str); + } + } + + const ROUNDTRIP_CASES: [&str; 60] = [ + // XY dimensions + "POINT(1 2)", + "LINESTRING(1 2,3 4,5 6)", + "POLYGON((0 1,2 0,2 3,0 3,0 1))", + "MULTIPOINT((1 2),(3 4))", + "MULTILINESTRING((1 2,3 4),(5 6,7 8))", + "MULTIPOLYGON(((0 1,2 0,2 3,0 3,0 1)))", + "GEOMETRYCOLLECTION(POINT(1 2),LINESTRING(3 4,5 6))", + "GEOMETRYCOLLECTION(GEOMETRYCOLLECTION(POINT(1 2)))", + // XYZ dimensions + "POINT Z(1 2 3)", + "LINESTRING Z(1 2 3,4 5 6)", + "POLYGON Z((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))", + "MULTIPOINT Z((1 2 3),(4 5 6))", + "MULTILINESTRING Z((1 2 3,4 5 6),(7 8 9,10 11 12))", + "MULTIPOLYGON Z(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))", + "GEOMETRYCOLLECTION Z(POINT Z(1 2 3))", + "GEOMETRYCOLLECTION Z(GEOMETRYCOLLECTION Z(POINT Z(1 2 3)))", + // XYM dimensions + "POINT M(1 2 3)", + "LINESTRING M(1 2 3,4 5 6)", + "POLYGON M((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))", + "MULTIPOINT M((1 2 3),(4 5 6))", + "MULTILINESTRING M((1 2 3,4 5 6),(7 8 9,10 11 12))", + "MULTIPOLYGON M(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))", + "GEOMETRYCOLLECTION M(POINT M(1 2 3))", + "GEOMETRYCOLLECTION M(GEOMETRYCOLLECTION M(POINT M(1 2 3)))", + // XYZM dimensions + "POINT ZM(1 2 3 4)", + "LINESTRING ZM(1 2 3 4,5 6 7 8)", + "POLYGON ZM((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3))", + "MULTIPOINT ZM((1 2 3 4),(5 6 7 8))", + "MULTILINESTRING ZM((1 2 3 4,5 6 7 8),(9 10 11 12,13 14 15 16))", + "MULTIPOLYGON ZM(((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3)))", + "GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4))", + "GEOMETRYCOLLECTION ZM(GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4)))", + // Empty geometries + "POINT EMPTY", + "LINESTRING EMPTY", + "POLYGON EMPTY", + "MULTIPOINT EMPTY", + "MULTILINESTRING EMPTY", + "MULTIPOLYGON EMPTY", + "GEOMETRYCOLLECTION EMPTY", + // Empty geometries Z + "POINT Z EMPTY", + "LINESTRING Z EMPTY", + "POLYGON Z EMPTY", + "MULTIPOINT Z EMPTY", + "MULTILINESTRING Z EMPTY", + "MULTIPOLYGON Z EMPTY", + "GEOMETRYCOLLECTION Z EMPTY", + // Empty geometries M + "POINT M EMPTY", + "LINESTRING M EMPTY", + "POLYGON M EMPTY", + "MULTIPOINT M EMPTY", + "MULTILINESTRING M EMPTY", + "MULTIPOLYGON M EMPTY", + "GEOMETRYCOLLECTION M EMPTY", + // Empty geometries ZM + "POINT ZM EMPTY", + "LINESTRING ZM EMPTY", + "POLYGON ZM EMPTY", + "MULTIPOINT ZM EMPTY", + "MULTILINESTRING ZM EMPTY", + "MULTIPOLYGON ZM EMPTY", + "GEOMETRYCOLLECTION ZM EMPTY", + ]; +} diff --git a/rust/sedona-geometry/src/lib.rs b/rust/sedona-geometry/src/lib.rs index f189ec7b4..5de9f65e2 100644 --- a/rust/sedona-geometry/src/lib.rs +++ b/rust/sedona-geometry/src/lib.rs @@ -18,6 +18,7 @@ pub mod analyze; pub mod bounding_box; pub mod bounds; pub mod error; +pub mod ewkb_factory; pub mod interval; pub mod is_empty; pub mod point_count; diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs index a4649b15f..3813df11c 100644 --- a/rust/sedona-schema/src/crs.rs +++ b/rust/sedona-schema/src/crs.rs @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use datafusion_common::{DataFusionError, Result}; +use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result}; use lru::LruCache; use std::cell::RefCell; use std::fmt::{Debug, Display}; @@ -318,9 +318,8 @@ impl FromStr for ProjJSON { type Err = DataFusionError; fn from_str(s: &str) -> Result { - let value: Value = serde_json::from_str(s).map_err(|err| { - DataFusionError::Internal(format!("Error deserializing PROJJSON Crs: {err}")) - })?; + let value: Value = serde_json::from_str(s) + .map_err(|err| plan_datafusion_err!("Error deserializing PROJJSON Crs: {err}"))?; Self::try_new(value) } @@ -329,9 +328,7 @@ impl FromStr for ProjJSON { impl ProjJSON { pub fn try_new(value: Value) -> Result { if !value.is_object() { - return Err(DataFusionError::Internal(format!( - "Can't create PROJJSON from non-object: {value}" - ))); + return plan_err!("Can't create PROJJSON from non-object: {value}"); } Ok(Self { value })