Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions python/sedonadb/python/sedonadb/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import os
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, List, Tuple
from typing import TYPE_CHECKING, List, Tuple, Any

import geoarrow.pyarrow as ga
import pyarrow as pa
Expand Down Expand Up @@ -125,6 +125,10 @@ def create_or_skip(cls, *args, **kwargs) -> "DBEngine":
f"Failed to create engine tester {cls.name()}: {e}\n{cls.install_hint()}"
)

def val_or_null(self, arg: Any) -> str:
"""Format SQL expression for a value or NULL"""
return val_or_null(arg)

def assert_query_result(self, query: str, expected, **kwargs) -> "DBEngine":
"""Assert a SQL query result matches an expected target

Expand Down Expand Up @@ -334,6 +338,12 @@ def create_or_skip(cls, *args, **kwargs):
# Don't allow this to fail with a skip
return cls(*args, **kwargs)

def val_or_null(self, arg):
if isinstance(arg, bytes):
return f"X'{arg.hex()}'"
else:
return super().val_or_null(arg)

def create_table_parquet(self, name, paths) -> "SedonaDB":
self.con.read_parquet(paths).to_memtable().to_view(name, overwrite=True)
return self
Expand Down Expand Up @@ -454,6 +464,12 @@ def install_hint(cls):
"- Run `docker compose up postgis` to start a test PostGIS runtime"
)

def val_or_null(self, arg):
if isinstance(arg, bytes):
return f"'\\x{arg.hex()}'::bytea"
else:
return super().val_or_null(arg)

def create_table_parquet(self, name, paths) -> "PostGIS":
import json

Expand Down Expand Up @@ -654,10 +670,20 @@ def geog_or_null(arg):


def val_or_null(arg):
"""Format SQL expression for a value or NULL"""
"""Format SQL expression for a value or NULL

Use an engine-specific method when formatting bytes as there is no
engine-agnostic way to to represent bytes as a SQL literal.

This is not secure (i.e., does not prevent SQL injection of any kind)
and should only be used for testing.
"""
if arg is None:
return "NULL"
return arg
elif isinstance(arg, bytes):
raise NotImplementedError("Use eng.val_or_null() to format bytes to SQL")
else:
return arg


def _geometry_columns(schema):
Expand Down
31 changes: 25 additions & 6 deletions python/sedonadb/tests/functions/test_wkb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,39 @@


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize("srid", [None, 4326])
@pytest.mark.parametrize("srid", [0, 4326])
@pytest.mark.parametrize(
"geom",
[
# XY dimensions
"POINT (1 2)",
"LINESTRING (1 2, 3 4, 5 6)",
"POLYGON ((0 1, 2 0, 2 3, 0 3, 0 1))",
"MULTIPOINT ((1 2), (3 4))",
"MULTIPOINT (1 2, 3 4)",
"MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))",
"MULTIPOLYGON (((0 1, 2 0, 2 3, 0 3, 0 1)))",
"GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))",
# XYZ dimensions
"POINT Z (1 2 3)",
"LINESTRING Z (1 2 3, 4 5 6)",
"POLYGON Z ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))",
"MULTIPOINT Z ((1 2 3), (4 5 6))",
"MULTIPOINT Z (1 2 3, 4 5 6)",
"MULTILINESTRING Z ((1 2 3, 4 5 6), (7 8 9, 10 11 12))",
"MULTIPOLYGON Z (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))",
"GEOMETRYCOLLECTION Z (POINT Z (1 2 3))",
# XYM dimensions
"POINT M (1 2 3)",
"LINESTRING M (1 2 3, 4 5 6)",
"POLYGON M ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))",
"MULTIPOINT M ((1 2 3), (4 5 6))",
"MULTIPOINT M (1 2 3, 4 5 6)",
"MULTILINESTRING M ((1 2 3, 4 5 6), (7 8 9, 10 11 12))",
"MULTIPOLYGON M (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))",
"GEOMETRYCOLLECTION M (POINT M (1 2 3))",
# XYZM dimensions
"POINT ZM (1 2 3 4)",
"LINESTRING ZM (1 2 3 4, 5 6 7 8)",
"POLYGON ZM ((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3))",
"MULTIPOINT ZM ((1 2 3 4), (5 6 7 8))",
"MULTIPOINT ZM (1 2 3 4, 5 6 7 8)",
"MULTILINESTRING ZM ((1 2 3 4, 5 6 7 8), (9 10 11 12, 13 14 15 16))",
"MULTIPOLYGON ZM (((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3)))",
"GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4))",
Expand All @@ -70,11 +70,14 @@
],
)
def test_st_asewkb(eng, srid, geom):
if shapely.geos_version < (3, 12, 0):
pytest.skip("GEOS version 3.12+ required for EWKB tests")

eng = eng.create_or_skip()

if geom is not None:
shapely_geom = shapely.from_wkt(geom)
if srid is not None:
if srid:
shapely_geom = shapely.set_srid(shapely_geom, srid)
write_srid = True
else:
Expand All @@ -90,4 +93,20 @@ def test_st_asewkb(eng, srid, geom):
else:
expected = None

# Check rendering of WKB against shapely
eng.assert_query_result(f"SELECT ST_AsEWKB({geom_or_null(geom, srid)})", expected)

# Check read of EWKB against read SRID
if expected is None:
srid = None
eng.assert_query_result(
f"SELECT ST_SRID(ST_GeomFromEWKB({eng.val_or_null(expected)}))", srid
)

# Check read of EWKB against read geometry content
# Workaround bug in geoarrow-c
if geom == "POINT EMPTY":
geom = "POINT (nan nan)"
eng.assert_query_result(
f"SELECT ST_SetSRID(ST_GeomFromEWKB({eng.val_or_null(expected)}), 0)", geom
)
11 changes: 7 additions & 4 deletions rust/sedona-expr/src/item_crs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,11 @@ pub fn make_item_crs(
}

/// Given an input type, separate it into an item and crs type (if the input
/// is an item_crs type). Otherwise, just return the item type as is.
fn parse_item_crs_arg_type(sedona_type: &SedonaType) -> Result<(SedonaType, Option<SedonaType>)> {
/// is an item_crs type). Otherwise, just return the item type as is and return a
/// CRS type of None.
pub fn parse_item_crs_arg_type(
sedona_type: &SedonaType,
) -> Result<(SedonaType, Option<SedonaType>)> {
if let SedonaType::Arrow(DataType::Struct(fields)) = sedona_type {
let field_names = fields.iter().map(|f| f.name()).collect::<Vec<_>>();
if field_names != ["item", "crs"] {
Expand All @@ -554,7 +557,7 @@ fn parse_item_crs_arg_type(sedona_type: &SedonaType) -> Result<(SedonaType, Opti
/// is an item_crs type). Otherwise, just return the item type as is. This
/// version strips the CRS, which we need to do here before passing it to the
/// underlying kernel (which expects all input CRSes to match).
fn parse_item_crs_arg_type_strip_crs(
pub fn parse_item_crs_arg_type_strip_crs(
sedona_type: &SedonaType,
) -> Result<(SedonaType, Option<SedonaType>)> {
match sedona_type {
Expand All @@ -573,7 +576,7 @@ fn parse_item_crs_arg_type_strip_crs(

/// Separate an argument into the item and its crs (if applicable). This
/// operates on the result of parse_item_crs_arg_type().
fn parse_item_crs_arg(
pub fn parse_item_crs_arg(
item_type: &SedonaType,
crs_type: &Option<SedonaType>,
arg: &ColumnarValue,
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub mod st_envelope_agg;
pub mod st_flipcoordinates;
mod st_geometryn;
mod st_geometrytype;
mod st_geomfromewkb;
mod st_geomfromwkb;
mod st_geomfromwkt;
mod st_haszm;
Expand Down
5 changes: 3 additions & 2 deletions rust/sedona-functions/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ pub fn default_function_set() -> FunctionSet {
crate::predicates::st_knn_udf,
crate::predicates::st_touches_udf,
crate::predicates::st_within_udf,
crate::st_line_merge::st_line_merge_udf,
crate::referencing::st_line_interpolate_point_udf,
crate::referencing::st_line_locate_point_udf,
crate::sd_format::sd_format_udf,
Expand All @@ -80,19 +79,21 @@ pub fn default_function_set() -> FunctionSet {
crate::st_flipcoordinates::st_flipcoordinates_udf,
crate::st_geometryn::st_geometryn_udf,
crate::st_geometrytype::st_geometry_type_udf,
crate::st_geomfromewkb::st_geomfromewkb_udf,
crate::st_geomfromwkb::st_geogfromwkb_udf,
crate::st_geomfromwkb::st_geomfromwkb_udf,
crate::st_geomfromwkb::st_geomfromwkbunchecked_udf,
crate::st_geomfromwkt::st_geogfromwkt_udf,
crate::st_geomfromwkt::st_geomfromwkt_udf,
crate::st_geomfromwkt::st_geomfromewkt_udf,
crate::st_geomfromwkt::st_geomfromwkt_udf,
crate::st_haszm::st_hasm_udf,
crate::st_haszm::st_hasz_udf,
crate::st_interiorringn::st_interiorringn_udf,
crate::st_isclosed::st_isclosed_udf,
crate::st_iscollection::st_iscollection_udf,
crate::st_isempty::st_isempty_udf,
crate::st_length::st_length_udf,
crate::st_line_merge::st_line_merge_udf,
crate::st_makeline::st_makeline_udf,
crate::st_numgeometries::st_numgeometries_udf,
crate::st_perimeter::st_perimeter_udf,
Expand Down
Loading