From 1c59285ee0f935b0b70bbbdadc848d10b567350d Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Sun, 26 Oct 2025 23:44:09 +0900 Subject: [PATCH 01/10] feat(sql): Implement ST_StartPoint() and ST_EndPoint() --- rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 2 + rust/sedona-functions/src/st_start_point.rs | 249 ++++++++++++++++++++ 3 files changed, 252 insertions(+) create mode 100644 rust/sedona-functions/src/st_start_point.rs diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index cf7608df4..42ee3dc06 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -49,6 +49,7 @@ mod st_point; mod st_pointzm; mod st_setsrid; mod st_srid; +mod st_start_point; mod st_transform; pub mod st_union_aggr; mod st_xyzm; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index 0e0d87130..30a9007aa 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -92,6 +92,8 @@ pub fn default_function_set() -> FunctionSet { crate::st_setsrid::st_set_srid_udf, crate::st_srid::st_crs_udf, crate::st_srid::st_srid_udf, + crate::st_start_point::st_end_point_udf, + crate::st_start_point::st_start_point_udf, crate::st_xyzm::st_m_udf, crate::st_xyzm::st_x_udf, crate::st_xyzm::st_y_udf, diff --git a/rust/sedona-functions/src/st_start_point.rs b/rust/sedona-functions/src/st_start_point.rs new file mode 100644 index 000000000..8d35afe9d --- /dev/null +++ b/rust/sedona-functions/src/st_start_point.rs @@ -0,0 +1,249 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use arrow_array::builder::BinaryBuilder; +use datafusion_common::error::Result; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_schema::{ + datatypes::{SedonaType, WKB_GEOMETRY}, + matchers::ArgMatcher, +}; +use std::sync::Arc; + +use crate::executor::WkbExecutor; + +/// ST_StartPoint() scalar UDF +/// +/// Native implementation to get the start point of a geometry +pub fn st_start_point_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_start_point", + vec![Arc::new(STStartOrEndPoint::new(true))], + Volatility::Immutable, + Some(st_start_point_doc()), + ) +} + +fn st_start_point_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the start point of a LINESTRING geometry. Returns NULL if the geometry is not a LINESTRING.", + "ST_StartPoint (geom: Geometry)", + ) + .with_argument("geom", "geometry: Input geometry") + .with_sql_example("SELECT ST_StartPoint(ST_GeomFromWKT('LINESTRING(0 1, 2 3, 4 5)'))") + .build() +} + +/// ST_EndPoint() scalar UDF +/// +/// Native implementation to get the end point of a geometry +pub fn st_end_point_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_end_point", + vec![Arc::new(STStartOrEndPoint::new(false))], + Volatility::Immutable, + Some(st_end_point_doc()), + ) +} + +fn st_end_point_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the end point of a LINESTRING geometry. Returns NULL if the geometry is not a LINESTRING.", + "ST_EndPoint (geom: Geometry)", + ) + .with_argument("geom", "geometry: Input geometry") + .with_sql_example("SELECT ST_EndPoint(ST_GeomFromWKT('LINESTRING(0 1, 2 3, 4 5)'))") + .build() +} + +#[derive(Debug)] +struct STStartOrEndPoint { + from_start: bool, +} + +impl STStartOrEndPoint { + fn new(from_start: bool) -> Self { + STStartOrEndPoint { from_start } + } +} + +impl SedonaScalarKernel for STStartOrEndPoint { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + // Temporary buffer for WKB + let mut item = [0u8; 37]; // 37 = 1 for byte order, 4 for type, 32 for coordinates (XYZM) + item[0] = 0x01; // byte order + + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + let buf = wkb.buf(); + let n_bytes = match (buf[1], buf[2]) { + // XY + // 0002 (0x00000002) = LINESTRING + // 0001 (0x00000001) = POINT + (0x02, 0x00) => { + item[1] = 0x01; + 16 + } + // XYZ + // 1002 (0x000003ea) = LINESTRING Z + // 1001 (0x000003e9) = POINT Z + (0xea, 0x03) => { + item[1] = 0xe9; + item[2] = 0x03; + 24 + } + // XYM + // 2002 (0x000007d2) = LINESTRING Z + // 2001 (0x000007d1) = POINT Z + (0xd2, 0x07) => { + item[1] = 0xd1; + item[2] = 0x07; + 24 + } + // XYZM + // 3002 (0x00000bba) = LINESTRING ZM + // 3001 (0x00000bb9) = POINT ZM + (0xba, 0x0b) => { + item[1] = 0xb9; + item[2] = 0x0b; + 32 + } + _ => { + builder.append_null(); + return Ok(()); + } + }; + let dst_offset = 5; + let src_offset = if self.from_start { + 9 + } else { + buf.len() - n_bytes + }; + item[dst_offset..(dst_offset + n_bytes)] + .copy_from_slice(&buf[src_offset..(src_offset + n_bytes)]); + builder.append_value(&item[0..(dst_offset + n_bytes)]); + } + None => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[cfg(test)] +mod tests { + use datafusion_expr::ScalarUDF; + use rstest::rstest; + use sedona_schema::datatypes::WKB_VIEW_GEOMETRY; + use sedona_testing::{ + compare::assert_array_equal, create::create_array, testers::ScalarUdfTester, + }; + + use super::*; + + #[test] + fn udf_metadata() { + let st_start_point_udf: ScalarUDF = st_start_point_udf().into(); + assert_eq!(st_start_point_udf.name(), "st_start_point"); + assert!(st_start_point_udf.documentation().is_some()); + + let st_end_point_udf: ScalarUDF = st_end_point_udf().into(); + assert_eq!(st_end_point_udf.name(), "st_end_point"); + assert!(st_end_point_udf.documentation().is_some()); + } + + #[rstest] + fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let tester_start_point = + ScalarUdfTester::new(st_start_point_udf().into(), vec![sedona_type.clone()]); + let tester_end_point = + ScalarUdfTester::new(st_end_point_udf().into(), vec![sedona_type.clone()]); + + let input = create_array( + &[ + Some("LINESTRING (1 2, 3 4, 5 6)"), + Some("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)"), + Some("LINESTRING M (1 2 3, 3 4 5, 5 6 7)"), + Some("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)"), + Some("POINT (1 2)"), + Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"), + Some("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))"), + None, + ], + &sedona_type, + ); + + let expected_start_point = create_array( + &[ + Some("POINT (1 2)"), + Some("POINT Z (1 2 3)"), + Some("POINT M (1 2 3)"), + Some("POINT ZM (1 2 3 4)"), + None, + None, + None, + None, + ], + &WKB_GEOMETRY, + ); + + let result_start_point = tester_start_point.invoke_array(input.clone()).unwrap(); + assert_array_equal(&result_start_point, &expected_start_point); + + let expected_end_point = create_array( + &[ + Some("POINT (5 6)"), + Some("POINT Z (5 6 7)"), + Some("POINT M (5 6 7)"), + Some("POINT ZM (5 6 7 8)"), + None, + None, + None, + None, + ], + &WKB_GEOMETRY, + ); + + let result_end_point = tester_end_point.invoke_array(input).unwrap(); + assert_array_equal(&result_end_point, &expected_end_point); + } +} From 1e50ef04a2fcfff0b86c959cc3507ecf9f4855a5 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 00:17:58 +0900 Subject: [PATCH 02/10] Use geo-traits --- rust/sedona-functions/src/st_start_point.rs | 107 ++++++++++---------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/rust/sedona-functions/src/st_start_point.rs b/rust/sedona-functions/src/st_start_point.rs index 8d35afe9d..82e797b96 100644 --- a/rust/sedona-functions/src/st_start_point.rs +++ b/rust/sedona-functions/src/st_start_point.rs @@ -19,13 +19,17 @@ use datafusion_common::error::Result; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; +use geo_traits::{CoordTrait, GeometryTrait, LineStringTrait}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_geometry::{ + error::SedonaGeometryError, + wkb_factory::{write_wkb_coord, write_wkb_point_header, WKB_MIN_PROBABLE_BYTES}, +}; use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, matchers::ArgMatcher, }; -use std::sync::Arc; +use std::{io::Write, sync::Arc}; use crate::executor::WkbExecutor; @@ -104,64 +108,28 @@ impl SedonaScalarKernel for STStartOrEndPoint { WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), ); - // Temporary buffer for WKB - let mut item = [0u8; 37]; // 37 = 1 for byte order, 4 for type, 32 for coordinates (XYZM) - item[0] = 0x01; // byte order - executor.execute_wkb_void(|maybe_wkb| { - match maybe_wkb { - Some(wkb) => { - let buf = wkb.buf(); - let n_bytes = match (buf[1], buf[2]) { - // XY - // 0002 (0x00000002) = LINESTRING - // 0001 (0x00000001) = POINT - (0x02, 0x00) => { - item[1] = 0x01; - 16 - } - // XYZ - // 1002 (0x000003ea) = LINESTRING Z - // 1001 (0x000003e9) = POINT Z - (0xea, 0x03) => { - item[1] = 0xe9; - item[2] = 0x03; - 24 - } - // XYM - // 2002 (0x000007d2) = LINESTRING Z - // 2001 (0x000007d1) = POINT Z - (0xd2, 0x07) => { - item[1] = 0xd1; - item[2] = 0x07; - 24 - } - // XYZM - // 3002 (0x00000bba) = LINESTRING ZM - // 3001 (0x00000bb9) = POINT ZM - (0xba, 0x0b) => { - item[1] = 0xb9; - item[2] = 0x0b; - 32 - } - _ => { - builder.append_null(); - return Ok(()); - } - }; - let dst_offset = 5; - let src_offset = if self.from_start { - 9 + if let Some(wkb) = maybe_wkb { + if let geo_traits::GeometryType::LineString(line_string) = wkb.as_type() { + let maybe_coord = if self.from_start { + line_string.coord(0) } else { - buf.len() - n_bytes + line_string.coord(line_string.num_coords() - 1) }; - item[dst_offset..(dst_offset + n_bytes)] - .copy_from_slice(&buf[src_offset..(src_offset + n_bytes)]); - builder.append_value(&item[0..(dst_offset + n_bytes)]); + + if let Some(coord) = maybe_coord { + write_wkb_start_point(&mut builder, coord).map_err(|_| { + datafusion_common::DataFusionError::Internal( + "Failed to write WKB point header".to_string(), + ) + })?; + builder.append_value([]); + return Ok(()); + } } - None => builder.append_null(), } + builder.append_null(); Ok(()) })?; @@ -169,6 +137,37 @@ impl SedonaScalarKernel for STStartOrEndPoint { } } +fn write_wkb_start_point( + buf: &mut impl Write, + coords: impl CoordTrait, +) -> Result<(), SedonaGeometryError> { + let dim = coords.dim(); + write_wkb_point_header(buf, dim)?; + + match dim.size() { + 2 => { + let coords_tuple = coords.x_y(); + write_wkb_coord(buf, coords_tuple) + } + 3 => { + let coords_tuple = (coords.x(), coords.y(), coords.nth_or_panic(2)); + write_wkb_coord(buf, coords_tuple) + } + 4 => { + let coords_tuple = ( + coords.x(), + coords.y(), + coords.nth_or_panic(2), + coords.nth_or_panic(3), + ); + write_wkb_coord(buf, coords_tuple) + } + _ => Err(SedonaGeometryError::Invalid( + "Unsupported number of dimensions".to_string(), + )), + } +} + #[cfg(test)] mod tests { use datafusion_expr::ScalarUDF; From 2024c21301aacc651224709a86f9e63724cff7de Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 00:57:13 +0900 Subject: [PATCH 03/10] Add Python tests --- .../tests/functions/test_functions.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 041a0a8e4..4c52da0dc 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -1015,6 +1015,45 @@ def test_st_pointm(eng, x, y, m, expected): expected, ) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geometry", "expected"), + [ + ("LINESTRING (1 2, 3 4, 5 6)", "POINT (1 2)" ), + ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (1 2 3)" ), + ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (1 2 3)" ), + ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", "POINT ZM (1 2 3 4)"), + ("POINT (1 2)", None ), + ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", None ), + ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None ), ), + ], +) +def test_st_start_point(eng, geometry, expected): + eng = eng.create_or_skip() + eng.assert_query_result( + f"SELECT ST_StartPoint({val_or_null(geometry)})", + expected, + ) + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geometry", "expected"), + [ + ("LINESTRING (1 2, 3 4, 5 6)", "POINT (5 6)" ), + ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (5 6 7)" ), + ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (5 6 7)" ), + ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", "POINT ZM (5 6 7 8)"), + ("POINT (1 2)", None ), + ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", None ), + ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None ), + ], +) +def test_st_end_point(eng, geometry, expected): + eng = eng.create_or_skip() + eng.assert_query_result( + f"SELECT ST_EndPoint({val_or_null(geometry)})", + expected, + ) @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( From 9e246ac26eaddce62d74da8ad7d32b6c3073c022 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 00:58:26 +0900 Subject: [PATCH 04/10] Use geom_or_null --- python/sedonadb/tests/functions/test_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 4c52da0dc..bf2a7beaf 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -1031,7 +1031,7 @@ def test_st_pointm(eng, x, y, m, expected): def test_st_start_point(eng, geometry, expected): eng = eng.create_or_skip() eng.assert_query_result( - f"SELECT ST_StartPoint({val_or_null(geometry)})", + f"SELECT ST_StartPoint({geom_or_null(geometry)})", expected, ) @@ -1051,7 +1051,7 @@ def test_st_start_point(eng, geometry, expected): def test_st_end_point(eng, geometry, expected): eng = eng.create_or_skip() eng.assert_query_result( - f"SELECT ST_EndPoint({val_or_null(geometry)})", + f"SELECT ST_EndPoint({geom_or_null(geometry)})", expected, ) From 00b4191df14f55daf93e8347dc82405f88845993 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 01:01:31 +0900 Subject: [PATCH 05/10] Add benchmarks --- .../sedona-functions/benches/native-functions.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/rust/sedona-functions/benches/native-functions.rs b/rust/sedona-functions/benches/native-functions.rs index f7d4ed247..76904f799 100644 --- a/rust/sedona-functions/benches/native-functions.rs +++ b/rust/sedona-functions/benches/native-functions.rs @@ -126,6 +126,22 @@ fn criterion_benchmark(c: &mut Criterion) { ), ); + benchmark::scalar( + c, + &f, + "native", + "st_start_point", + BenchmarkArgs::Array(LineString(10)), + ); + + benchmark::scalar( + c, + &f, + "native", + "st_end_point", + BenchmarkArgs::Array(LineString(10)), + ); + benchmark::scalar(c, &f, "native", "st_x", Point); benchmark::scalar(c, &f, "native", "st_y", Point); benchmark::scalar(c, &f, "native", "st_z", Point); From a6cc10134073e1bf5cf94af6799bed561b7d56ae Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 19:20:14 +0900 Subject: [PATCH 06/10] Apply suggestions from code review Co-authored-by: Dewey Dunnington --- python/sedonadb/tests/functions/test_functions.py | 2 +- rust/sedona-functions/src/st_start_point.rs | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index bf2a7beaf..43147e55e 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -1025,7 +1025,7 @@ def test_st_pointm(eng, x, y, m, expected): ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", "POINT ZM (1 2 3 4)"), ("POINT (1 2)", None ), ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", None ), - ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None ), ), + ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None ), ], ) def test_st_start_point(eng, geometry, expected): diff --git a/rust/sedona-functions/src/st_start_point.rs b/rust/sedona-functions/src/st_start_point.rs index 82e797b96..3848b2e0c 100644 --- a/rust/sedona-functions/src/st_start_point.rs +++ b/rust/sedona-functions/src/st_start_point.rs @@ -119,9 +119,8 @@ impl SedonaScalarKernel for STStartOrEndPoint { if let Some(coord) = maybe_coord { write_wkb_start_point(&mut builder, coord).map_err(|_| { - datafusion_common::DataFusionError::Internal( - "Failed to write WKB point header".to_string(), - ) + sedona_internal_err!("Failed to write WKB point header") + })?; builder.append_value([]); return Ok(()); From 96f9232cfa64c29ed722eaf5c0dde4d164ec9a04 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 19:57:14 +0900 Subject: [PATCH 07/10] Fix function name --- .../sedona-functions/benches/native-functions.rs | 4 ++-- rust/sedona-functions/src/st_start_point.rs | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rust/sedona-functions/benches/native-functions.rs b/rust/sedona-functions/benches/native-functions.rs index 76904f799..af89a4683 100644 --- a/rust/sedona-functions/benches/native-functions.rs +++ b/rust/sedona-functions/benches/native-functions.rs @@ -130,7 +130,7 @@ fn criterion_benchmark(c: &mut Criterion) { c, &f, "native", - "st_start_point", + "st_startpoint", BenchmarkArgs::Array(LineString(10)), ); @@ -138,7 +138,7 @@ fn criterion_benchmark(c: &mut Criterion) { c, &f, "native", - "st_end_point", + "st_endpoint", BenchmarkArgs::Array(LineString(10)), ); diff --git a/rust/sedona-functions/src/st_start_point.rs b/rust/sedona-functions/src/st_start_point.rs index 3848b2e0c..13ff7ea3b 100644 --- a/rust/sedona-functions/src/st_start_point.rs +++ b/rust/sedona-functions/src/st_start_point.rs @@ -20,6 +20,7 @@ use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; use geo_traits::{CoordTrait, GeometryTrait, LineStringTrait}; +use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_geometry::{ error::SedonaGeometryError, @@ -38,7 +39,7 @@ use crate::executor::WkbExecutor; /// Native implementation to get the start point of a geometry pub fn st_start_point_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( - "st_start_point", + "st_startpoint", vec![Arc::new(STStartOrEndPoint::new(true))], Volatility::Immutable, Some(st_start_point_doc()), @@ -61,7 +62,7 @@ fn st_start_point_doc() -> Documentation { /// Native implementation to get the end point of a geometry pub fn st_end_point_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( - "st_end_point", + "st_endpoint", vec![Arc::new(STStartOrEndPoint::new(false))], Volatility::Immutable, Some(st_end_point_doc()), @@ -118,10 +119,9 @@ impl SedonaScalarKernel for STStartOrEndPoint { }; if let Some(coord) = maybe_coord { - write_wkb_start_point(&mut builder, coord).map_err(|_| { - sedona_internal_err!("Failed to write WKB point header") - - })?; + if write_wkb_start_point(&mut builder, coord).is_err() { + return sedona_internal_err!("Failed to write WKB point header"); + }; builder.append_value([]); return Ok(()); } @@ -181,11 +181,11 @@ mod tests { #[test] fn udf_metadata() { let st_start_point_udf: ScalarUDF = st_start_point_udf().into(); - assert_eq!(st_start_point_udf.name(), "st_start_point"); + assert_eq!(st_start_point_udf.name(), "st_startpoint"); assert!(st_start_point_udf.documentation().is_some()); let st_end_point_udf: ScalarUDF = st_end_point_udf().into(); - assert_eq!(st_end_point_udf.name(), "st_end_point"); + assert_eq!(st_end_point_udf.name(), "st_endpoint"); assert!(st_end_point_udf.documentation().is_some()); } From 0368d9b444336c163f17bbf920008b3d84901bc4 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 20:15:15 +0900 Subject: [PATCH 08/10] Add write_wkb_coord_trait --- rust/sedona-functions/src/st_start_point.rs | 33 +++--------- rust/sedona-geometry/src/wkb_factory.rs | 58 ++++++++++++++++++++- 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/rust/sedona-functions/src/st_start_point.rs b/rust/sedona-functions/src/st_start_point.rs index 13ff7ea3b..8e0aff1c5 100644 --- a/rust/sedona-functions/src/st_start_point.rs +++ b/rust/sedona-functions/src/st_start_point.rs @@ -24,7 +24,9 @@ use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_geometry::{ error::SedonaGeometryError, - wkb_factory::{write_wkb_coord, write_wkb_point_header, WKB_MIN_PROBABLE_BYTES}, + wkb_factory::{ + write_wkb_coord, write_wkb_coord_trait, write_wkb_point_header, WKB_MIN_PROBABLE_BYTES, + }, }; use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, @@ -138,33 +140,10 @@ impl SedonaScalarKernel for STStartOrEndPoint { fn write_wkb_start_point( buf: &mut impl Write, - coords: impl CoordTrait, + coord: impl CoordTrait, ) -> Result<(), SedonaGeometryError> { - let dim = coords.dim(); - write_wkb_point_header(buf, dim)?; - - match dim.size() { - 2 => { - let coords_tuple = coords.x_y(); - write_wkb_coord(buf, coords_tuple) - } - 3 => { - let coords_tuple = (coords.x(), coords.y(), coords.nth_or_panic(2)); - write_wkb_coord(buf, coords_tuple) - } - 4 => { - let coords_tuple = ( - coords.x(), - coords.y(), - coords.nth_or_panic(2), - coords.nth_or_panic(3), - ); - write_wkb_coord(buf, coords_tuple) - } - _ => Err(SedonaGeometryError::Invalid( - "Unsupported number of dimensions".to_string(), - )), - } + write_wkb_point_header(buf, coord.dim())?; + write_wkb_coord_trait(buf, &coord) } #[cfg(test)] diff --git a/rust/sedona-geometry/src/wkb_factory.rs b/rust/sedona-geometry/src/wkb_factory.rs index 000788f0a..9db1d29d3 100644 --- a/rust/sedona-geometry/src/wkb_factory.rs +++ b/rust/sedona-geometry/src/wkb_factory.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. use crate::error::SedonaGeometryError; -use geo_traits::Dimensions; +use geo_traits::{CoordTrait, Dimensions}; use std::io::Write; pub const WKB_MIN_PROBABLE_BYTES: usize = 21; @@ -392,6 +392,37 @@ where Ok(()) } +/// Write a single coordinate of CoordTrait to WKB +/// This function always writes little endian coordinates. +pub fn write_wkb_coord_trait(buf: &mut impl Write, coord: &C) -> Result<(), SedonaGeometryError> +where + C: CoordTrait, +{ + match coord.dim().size() { + 2 => { + let coord_tuple = coord.x_y(); + write_wkb_coord(buf, coord_tuple) + } + 3 => { + let coord_tuple: (::T, _, _) = + (coord.x(), coord.y(), coord.nth_or_panic(2)); + write_wkb_coord(buf, coord_tuple) + } + 4 => { + let coord_tuple = ( + coord.x(), + coord.y(), + coord.nth_or_panic(2), + coord.nth_or_panic(3), + ); + write_wkb_coord(buf, coord_tuple) + } + _ => Err(SedonaGeometryError::Invalid( + "Unsupported number of dimensions".to_string(), + )), + } +} + /// Write multiple coordinates to WKB /// /// This function takes an iterator of coordinates and writes them to the provided buffer. @@ -537,6 +568,31 @@ mod test { check_bytes(&wkb, "POINT ZM(12 13 14 15)"); } + #[test] + fn test_write_wkb_coord_trait() { + let cases = [ + (None, None, "POINT(0 1)"), + (Some(2.0), None, "POINT Z(0 1 2)"), + (None, Some(3.0), "POINT M(0 1 3)"), + (Some(2.0), Some(3.0), "POINT ZM(0 1 2 3)"), + ]; + let mut wkb = vec![]; + + for (z, m, expected) in cases { + let coord = wkt::types::Coord { + x: 0.0, + y: 1.0, + z, + m, + }; + + wkb.clear(); + write_wkb_point_header(&mut wkb, coord.dim()).unwrap(); + write_wkb_coord_trait(&mut wkb, &coord).unwrap(); + check_bytes(&wkb, expected); + } + } + #[test] fn test_wkb_linestring() { let wkt: Wkt = Wkt::from_str("LINESTRING EMPTY").unwrap(); From 192e1850b5ce0274490190316aca3e27f74bd3ef Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 21:28:09 +0900 Subject: [PATCH 09/10] Handle other geometries --- .../tests/functions/test_functions.py | 34 ++++--- rust/sedona-functions/src/st_start_point.rs | 90 ++++++++++++++----- 2 files changed, 92 insertions(+), 32 deletions(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 43147e55e..da009ff9e 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -1015,17 +1015,25 @@ def test_st_pointm(eng, x, y, m, expected): expected, ) + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geometry", "expected"), [ - ("LINESTRING (1 2, 3 4, 5 6)", "POINT (1 2)" ), - ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (1 2 3)" ), - ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (1 2 3)" ), + ("LINESTRING (1 2, 3 4, 5 6)", "POINT (1 2)"), + ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (1 2 3)"), + ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (1 2 3)"), ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", "POINT ZM (1 2 3 4)"), - ("POINT (1 2)", None ), - ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", None ), - ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None ), + ("POINT (1 2)", "POINT (1 2)"), + ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", "POINT (0 0)"), + ("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)", "POINT (0 0)"), + ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", "POINT (1 2)"), + ("MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)))", "POINT (0 0)"), + ("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", "POINT (1 2)"), + ( + "GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))))", + "POINT (1 2)", + ), ], ) def test_st_start_point(eng, geometry, expected): @@ -1035,17 +1043,18 @@ def test_st_start_point(eng, geometry, expected): expected, ) + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geometry", "expected"), [ - ("LINESTRING (1 2, 3 4, 5 6)", "POINT (5 6)" ), - ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (5 6 7)" ), - ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (5 6 7)" ), + ("LINESTRING (1 2, 3 4, 5 6)", "POINT (5 6)"), + ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (5 6 7)"), + ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (5 6 7)"), ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", "POINT ZM (5 6 7 8)"), - ("POINT (1 2)", None ), - ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", None ), - ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None ), + ("POINT (1 2)", None), + ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", None), + ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None), ], ) def test_st_end_point(eng, geometry, expected): @@ -1055,6 +1064,7 @@ def test_st_end_point(eng, geometry, expected): expected, ) + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("x", "y", "z", "m", "expected"), diff --git a/rust/sedona-functions/src/st_start_point.rs b/rust/sedona-functions/src/st_start_point.rs index 8e0aff1c5..a8726859b 100644 --- a/rust/sedona-functions/src/st_start_point.rs +++ b/rust/sedona-functions/src/st_start_point.rs @@ -19,14 +19,15 @@ use datafusion_common::error::Result; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; -use geo_traits::{CoordTrait, GeometryTrait, LineStringTrait}; +use geo_traits::{ + CoordTrait, GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, + MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait, +}; use sedona_common::sedona_internal_err; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_geometry::{ error::SedonaGeometryError, - wkb_factory::{ - write_wkb_coord, write_wkb_coord_trait, write_wkb_point_header, WKB_MIN_PROBABLE_BYTES, - }, + wkb_factory::{write_wkb_coord_trait, write_wkb_point_header, WKB_MIN_PROBABLE_BYTES}, }; use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, @@ -113,20 +114,12 @@ impl SedonaScalarKernel for STStartOrEndPoint { executor.execute_wkb_void(|maybe_wkb| { if let Some(wkb) = maybe_wkb { - if let geo_traits::GeometryType::LineString(line_string) = wkb.as_type() { - let maybe_coord = if self.from_start { - line_string.coord(0) - } else { - line_string.coord(line_string.num_coords() - 1) + if let Some(coord) = extract_first_geometry(&wkb, self.from_start) { + if write_wkb_start_point(&mut builder, coord).is_err() { + return sedona_internal_err!("Failed to write WKB point header"); }; - - if let Some(coord) = maybe_coord { - if write_wkb_start_point(&mut builder, coord).is_err() { - return sedona_internal_err!("Failed to write WKB point header"); - }; - builder.append_value([]); - return Ok(()); - } + builder.append_value([]); + return Ok(()); } } @@ -146,6 +139,54 @@ fn write_wkb_start_point( write_wkb_coord_trait(buf, &coord) } +// - ST_StartPoint returns result for all types of geometries +// - ST_EndPoint returns result only for LINESTRING +fn extract_first_geometry<'a>( + wkb: &'a wkb::reader::Wkb<'a>, + from_start: bool, +) -> Option> { + match (wkb.as_type(), from_start) { + (geo_traits::GeometryType::Point(point), true) => point.coord(), + (geo_traits::GeometryType::LineString(line_string), true) => line_string.coord(0), + (geo_traits::GeometryType::LineString(line_string), false) => { + line_string.coord(line_string.num_coords() - 1) + } + (geo_traits::GeometryType::Polygon(polygon), true) => match polygon.exterior() { + Some(ring) => ring.coord(0), + None => None, + }, + (geo_traits::GeometryType::MultiPoint(multi_point), true) => match multi_point.point(0) { + Some(point) => point.coord(), + None => None, + }, + (geo_traits::GeometryType::MultiLineString(multi_line_string), true) => { + match multi_line_string.line_string(0) { + Some(line_string) => line_string.coord(0), + None => None, + } + } + (geo_traits::GeometryType::MultiPolygon(multi_polygon), true) => { + match multi_polygon.polygon(0) { + Some(polygon) => match polygon.exterior() { + Some(ring) => ring.coord(0), + None => None, + }, + None => None, + } + } + (geo_traits::GeometryType::GeometryCollection(geometry_collection), true) => { + match geometry_collection.geometry(0) { + Some(geometry) => extract_first_geometry(geometry, from_start), + None => None, + } + } + (geo_traits::GeometryType::Rect(_), true) => None, + (geo_traits::GeometryType::Triangle(_), true) => None, + (geo_traits::GeometryType::Line(_), true) => None, + _ => None, + } +} + #[cfg(test)] mod tests { use datafusion_expr::ScalarUDF; @@ -183,7 +224,10 @@ mod tests { Some("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)"), Some("POINT (1 2)"), Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"), + Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)"), Some("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))"), + Some("MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)))"), + Some("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))"), None, ], &sedona_type, @@ -195,9 +239,12 @@ mod tests { Some("POINT Z (1 2 3)"), Some("POINT M (1 2 3)"), Some("POINT ZM (1 2 3 4)"), - None, - None, - None, + Some("POINT (1 2)"), + Some("POINT (0 0)"), + Some("POINT (0 0)"), + Some("POINT (1 2)"), + Some("POINT (0 0)"), + Some("POINT (1 2)"), None, ], &WKB_GEOMETRY, @@ -216,6 +263,9 @@ mod tests { None, None, None, + None, + None, + None, ], &WKB_GEOMETRY, ); From e67cd35c86b8be87277cac6e4f5ec0360ea0a1eb Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Tue, 28 Oct 2025 21:57:51 +0900 Subject: [PATCH 10/10] Add Python benchmark --- benchmarks/test_functions.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py index d8ec00830..cf0efd6f1 100644 --- a/benchmarks/test_functions.py +++ b/benchmarks/test_functions.py @@ -203,3 +203,39 @@ def queries(): eng.execute_and_collect(f"SELECT ST_Perimeter(geom1) from {table}") benchmark(queries) + + @pytest.mark.parametrize( + "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread] + ) + @pytest.mark.parametrize( + "table", + [ + "collections_simple", + "segments_large", + ], + ) + def test_st_start_point(self, benchmark, eng, table): + eng = self._get_eng(eng) + + def queries(): + eng.execute_and_collect(f"SELECT ST_StartPoint(geom1) from {table}") + + benchmark(queries) + + @pytest.mark.parametrize( + "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread] + ) + @pytest.mark.parametrize( + "table", + [ + "collections_simple", + "segments_large", + ], + ) + def test_st_end_point(self, benchmark, eng, table): + eng = self._get_eng(eng) + + def queries(): + eng.execute_and_collect(f"SELECT ST_EndPoint(geom1) from {table}") + + benchmark(queries)