From 26618c78d461010a4e20da6480d85d1e8b7d9e3d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 27 Dec 2025 22:05:57 -0600 Subject: [PATCH 01/26] start on expr stuff --- r/sedonadb/R/expression.R | 42 +++++++++++++++++++ r/sedonadb/R/type.R | 0 r/sedonadb/src/rust/src/expression.rs | 59 +++++++++++++++++++++++++++ r/sedonadb/src/rust/src/lib.rs | 1 + 4 files changed, 102 insertions(+) create mode 100644 r/sedonadb/R/expression.R create mode 100644 r/sedonadb/R/type.R create mode 100644 r/sedonadb/src/rust/src/expression.rs diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R new file mode 100644 index 000000000..79efc4cfa --- /dev/null +++ b/r/sedonadb/R/expression.R @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Create a SedonaDB Logical Expression +#' +#' @param x An object +#' @param ... Passed to/from methods +#' @param type An optional data type to request for the output +#' +#' @returns An object of class SedonaDBExpr +#' @export +as_sedonadb_expr <- function(x, ..., type = NULL) { + UseMethod("as_sedonadb_expr") +} + +as_sedonadb_expr.SedonaDBExpr <- function(x, ..., type = NULL) { + if (!is.null(type)) { + x$cast(nanoarrow::as_nanoarrow_schema(x)) + } else { + x + } +} + + + + + + diff --git a/r/sedonadb/R/type.R b/r/sedonadb/R/type.R new file mode 100644 index 000000000..e69de29bb diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs new file mode 100644 index 000000000..4a2cffb7f --- /dev/null +++ b/r/sedonadb/src/rust/src/expression.rs @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion_expr::{expr::ScalarFunction, Expr}; +use savvy::{savvy, savvy_err}; +use sedona::context::SedonaContext; + +#[savvy] +pub struct SedonaDBExpr { + pub inner: Expr, +} + +#[savvy] +pub struct SedonaDBExprFactory { + pub ctx: Arc, +} + +#[savvy] +impl SedonaDBExprFactory { + fn scalar_function(&self, name: &str, args: savvy::Sexp) -> savvy::Result { + if let Some(scalar_udf) = self.ctx.ctx.state().scalar_functions().get(name) { + let args = Self::exprs(args)?; + let inner = ScalarFunction::new_udf(scalar_udf.clone(), args); + Ok(SedonaDBExpr { + inner: Expr::ScalarFunction(inner), + }) + } else { + Err(savvy_err!("Scalar UDF '{name}' not found")) + } + } +} + +impl SedonaDBExprFactory { + fn exprs(exprs_sexp: savvy::Sexp) -> savvy::Result> { + savvy::ListSexp::try_from(exprs_sexp)? + .iter() + .map(|(_, item)| -> savvy::Result { + let expr_wrapper = SedonaDBExpr::try_from(item)?; + Ok(expr_wrapper.inner.clone()) + }) + .collect() + } +} diff --git a/r/sedonadb/src/rust/src/lib.rs b/r/sedonadb/src/rust/src/lib.rs index 07c6f311a..40c4e9e07 100644 --- a/r/sedonadb/src/rust/src/lib.rs +++ b/r/sedonadb/src/rust/src/lib.rs @@ -26,6 +26,7 @@ use sedona_proj::register::{configure_global_proj_engine, ProjCrsEngineBuilder}; mod context; mod dataframe; +mod expression; mod error; mod ffi; mod runtime; From b71f1355f9e1399c383d97a78a4bf1b8dabfd7a9 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 27 Dec 2025 22:27:34 -0600 Subject: [PATCH 02/26] add updater script --- r/sedonadb/tools/savvy-update.sh | 94 ++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100755 r/sedonadb/tools/savvy-update.sh diff --git a/r/sedonadb/tools/savvy-update.sh b/r/sedonadb/tools/savvy-update.sh new file mode 100755 index 000000000..ddba62590 --- /dev/null +++ b/r/sedonadb/tools/savvy-update.sh @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +set -eu + +main() { + local -r source_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + local -r source_rpkg_dir="$(cd "${source_dir}/../" && pwd)" + + # Run the updater + savvy-cli update "${source_rpkg_dir}" + + # Post-process files + local -r api_h="${source_rpkg_dir}/src/rust/api.h" + local -r init_c="${source_rpkg_dir}/src/init.c" + local -r wrappers_r="${source_rpkg_dir}/R/000-wrappers.R" + + mv "${api_h}" "${api_h}.tmp" + mv "${init_c}" "${init_c}.tmp" + mv "${wrappers_r}" "${wrappers_r}.tmp" + + # Add license header to api.h + echo "${LICENSE_C}" > "${api_h}" + cat "${api_h}.tmp" >> "${api_h}" + + # Add license header, put includes on their own lines, and fix a typo in init.c + echo "${LICENSE_C}" > "${init_c}" + sed 's/#include/\n#include/g' "${init_c}.tmp" | \ + sed '1s/^\n//' | \ + sed 's/initialzation/initialization/g' >> "${init_c}" + + # Add license header to 000-wrappers.R + echo "${LICENSE_R}" > "${wrappers_r}" + cat "${wrappers_r}.tmp" >> "${wrappers_r}" + + # Run clang-format on the generated C files + clang-format -i "${api_h}" + clang-format -i "${init_c}" + + # Remove .tmp files + rm "${api_h}.tmp" "${init_c}.tmp" "${wrappers_r}.tmp" +} + +LICENSE_R='# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +' +LICENSE_C='// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +' + +main From 8de858d0fe00be017fcd1bbbabfd07c9d37c2beb Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 27 Dec 2025 23:33:21 -0600 Subject: [PATCH 03/26] literals and printing --- r/sedonadb/NAMESPACE | 9 ++++ r/sedonadb/R/000-wrappers.R | 68 +++++++++++++++++++++++++++ r/sedonadb/R/expression.R | 52 ++++++++++++++++++-- r/sedonadb/man/as_sedonadb_expr.Rd | 21 +++++++++ r/sedonadb/src/init.c | 26 ++++++++++ r/sedonadb/src/rust/api.h | 10 ++++ r/sedonadb/src/rust/src/expression.rs | 31 +++++++++++- r/sedonadb/src/rust/src/ffi.rs | 23 ++++++++- 8 files changed, 232 insertions(+), 8 deletions(-) create mode 100644 r/sedonadb/man/as_sedonadb_expr.Rd diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index f7eb1ee73..2a9f60735 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -10,14 +10,23 @@ S3method(as_sedonadb_dataframe,nanoarrow_array) S3method(as_sedonadb_dataframe,nanoarrow_array_stream) S3method(as_sedonadb_dataframe,sedonadb_dataframe) S3method(as_sedonadb_dataframe,sf) +S3method(as_sedonadb_expr,SedonaDBExpr) +S3method(as_sedonadb_expr,character) +S3method(as_sedonadb_expr,double) +S3method(as_sedonadb_expr,integer) +S3method(as_sedonadb_expr,nanoarrow_array) S3method(dim,sedonadb_dataframe) S3method(dimnames,sedonadb_dataframe) S3method(head,sedonadb_dataframe) S3method(infer_nanoarrow_schema,sedonadb_dataframe) S3method(print,"sedonadb::InternalContext__bundle") S3method(print,"sedonadb::InternalDataFrame__bundle") +S3method(print,"sedonadb::SedonaDBExprFactory__bundle") +S3method(print,"sedonadb::SedonaDBExpr__bundle") +S3method(print,SedonaDBExpr) S3method(print,sedonadb_dataframe) export(as_sedonadb_dataframe) +export(as_sedonadb_expr) export(sd_collect) export(sd_compute) export(sd_configure_proj) diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R index df1f61fd9..0482ac8d4 100644 --- a/r/sedonadb/R/000-wrappers.R +++ b/r/sedonadb/R/000-wrappers.R @@ -265,3 +265,71 @@ class(`InternalDataFrame`) <- c("sedonadb::InternalDataFrame__bundle", "savvy_se `print.sedonadb::InternalDataFrame__bundle` <- function(x, ...) { cat('sedonadb::InternalDataFrame\n') } + +### wrapper functions for SedonaDBExpr + +`SedonaDBExpr_debug_string` <- function(self) { + function() { + .Call(savvy_SedonaDBExpr_debug_string__impl, `self`) + } +} + +`.savvy_wrap_SedonaDBExpr` <- function(ptr) { + e <- new.env(parent = emptyenv()) + e$.ptr <- ptr + e$`debug_string` <- `SedonaDBExpr_debug_string`(ptr) + + class(e) <- c("sedonadb::SedonaDBExpr", "SedonaDBExpr", "savvy_sedonadb__sealed") + e +} + + + +`SedonaDBExpr` <- new.env(parent = emptyenv()) + +### associated functions for SedonaDBExpr + + + +class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__sealed") + +#' @export +`print.sedonadb::SedonaDBExpr__bundle` <- function(x, ...) { + cat('sedonadb::SedonaDBExpr\n') +} + +### wrapper functions for SedonaDBExprFactory + +`SedonaDBExprFactory_scalar_function` <- function(self) { + function(`name`, `args`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_scalar_function__impl, `self`, `name`, `args`)) + } +} + +`.savvy_wrap_SedonaDBExprFactory` <- function(ptr) { + e <- new.env(parent = emptyenv()) + e$.ptr <- ptr + e$`scalar_function` <- `SedonaDBExprFactory_scalar_function`(ptr) + + class(e) <- c("sedonadb::SedonaDBExprFactory", "SedonaDBExprFactory", "savvy_sedonadb__sealed") + e +} + + + +`SedonaDBExprFactory` <- new.env(parent = emptyenv()) + +### associated functions for SedonaDBExprFactory + +`SedonaDBExprFactory`$`literal` <- function(`array_xptr`, `schema_xptr`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_literal__impl, `array_xptr`, `schema_xptr`)) +} + + +class(`SedonaDBExprFactory`) <- c("sedonadb::SedonaDBExprFactory__bundle", "savvy_sedonadb__sealed") + +#' @export +`print.sedonadb::SedonaDBExprFactory__bundle` <- function(x, ...) { + cat('sedonadb::SedonaDBExprFactory\n') +} + diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 79efc4cfa..27ed3ffbf 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -27,16 +27,58 @@ as_sedonadb_expr <- function(x, ..., type = NULL) { UseMethod("as_sedonadb_expr") } +#' @export as_sedonadb_expr.SedonaDBExpr <- function(x, ..., type = NULL) { - if (!is.null(type)) { - x$cast(nanoarrow::as_nanoarrow_schema(x)) - } else { - x - } + handle_type_request(x, type) +} + +#' @export +as_sedonadb_expr.character <- function(x, ..., type = NULL) { + as_sedonadb_expr_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_expr.integer <- function(x, ..., type = NULL) { + as_sedonadb_expr_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_expr.double <- function(x, ..., type = NULL) { + as_sedonadb_expr_from_nanoarrow(x, ..., type = type) } +as_sedonadb_expr_from_nanoarrow <- function(x, ..., type = NULL) { + if (length(x) != 1 || is.object(x)) { + stop("Can't convert non-scalar chr to sedonadb_expr") + } + array <- nanoarrow::as_nanoarrow_array(x) + as_sedonadb_expr(array, type = type) +} +#' @export +as_sedonadb_expr.nanoarrow_array <- function(x, ..., type = NULL) { + schema <- nanoarrow::infer_nanoarrow_schema(x) + array_export <- nanoarrow::nanoarrow_allocate_array() + nanoarrow::nanoarrow_pointer_export(x, array_export) + expr <- SedonaDBExprFactory$literal(array_export, schema) + handle_type_request(expr, type) +} + +handle_type_request <- function(x, type) { + if (!is.null(type)) { + x$cast(nanoarrow::as_nanoarrow_schema(x)) + } else { + x + } +} +#' @export +print.SedonaDBExpr <- function(x, ...) { + cat("\n") + cat(x$debug_string()) + cat("\n") + invisible(x) +} diff --git a/r/sedonadb/man/as_sedonadb_expr.Rd b/r/sedonadb/man/as_sedonadb_expr.Rd new file mode 100644 index 000000000..f12b1a376 --- /dev/null +++ b/r/sedonadb/man/as_sedonadb_expr.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expression.R +\name{as_sedonadb_expr} +\alias{as_sedonadb_expr} +\title{Create a SedonaDB Logical Expression} +\usage{ +as_sedonadb_expr(x, ..., type = NULL) +} +\arguments{ +\item{x}{An object} + +\item{...}{Passed to/from methods} + +\item{type}{An optional data type to request for the output} +} +\value{ +An object of class SedonaDBExpr +} +\description{ +Create a SedonaDB Logical Expression +} diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c index 8405b2cc4..6c9c9e159 100644 --- a/r/sedonadb/src/init.c +++ b/r/sedonadb/src/init.c @@ -209,6 +209,26 @@ SEXP savvy_InternalDataFrame_to_view__impl(SEXP self__, SEXP c_arg__ctx, return handle_result(res); } +SEXP savvy_SedonaDBExpr_debug_string__impl(SEXP self__) { + SEXP res = savvy_SedonaDBExpr_debug_string__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_literal__impl(SEXP c_arg__array_xptr, + SEXP c_arg__schema_xptr) { + SEXP res = savvy_SedonaDBExprFactory_literal__ffi(c_arg__array_xptr, + c_arg__schema_xptr); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_scalar_function__impl(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args) { + SEXP res = savvy_SedonaDBExprFactory_scalar_function__ffi(self__, c_arg__name, + c_arg__args); + return handle_result(res); +} + static const R_CallMethodDef CallEntries[] = { {"savvy_configure_proj_shared__impl", (DL_FUNC)&savvy_configure_proj_shared__impl, 3}, @@ -258,6 +278,12 @@ static const R_CallMethodDef CallEntries[] = { (DL_FUNC)&savvy_InternalDataFrame_to_provider__impl, 1}, {"savvy_InternalDataFrame_to_view__impl", (DL_FUNC)&savvy_InternalDataFrame_to_view__impl, 4}, + {"savvy_SedonaDBExpr_debug_string__impl", + (DL_FUNC)&savvy_SedonaDBExpr_debug_string__impl, 1}, + {"savvy_SedonaDBExprFactory_literal__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_literal__impl, 2}, + {"savvy_SedonaDBExprFactory_scalar_function__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_scalar_function__impl, 3}, {NULL, NULL, 0}}; void R_init_sedonadb(DllInfo *dll) { diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h index 201039e14..ddc3a5449 100644 --- a/r/sedonadb/src/rust/api.h +++ b/r/sedonadb/src/rust/api.h @@ -60,3 +60,13 @@ SEXP savvy_InternalDataFrame_to_provider__ffi(SEXP self__); SEXP savvy_InternalDataFrame_to_view__ffi(SEXP self__, SEXP c_arg__ctx, SEXP c_arg__table_ref, SEXP c_arg__overwrite); + +// methods and associated functions for SedonaDBExpr +SEXP savvy_SedonaDBExpr_debug_string__ffi(SEXP self__); + +// methods and associated functions for SedonaDBExprFactory +SEXP savvy_SedonaDBExprFactory_literal__ffi(SEXP c_arg__array_xptr, + SEXP c_arg__schema_xptr); +SEXP savvy_SedonaDBExprFactory_scalar_function__ffi(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args); diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs index 4a2cffb7f..f8c2a9806 100644 --- a/r/sedonadb/src/rust/src/expression.rs +++ b/r/sedonadb/src/rust/src/expression.rs @@ -17,15 +17,28 @@ use std::sync::Arc; -use datafusion_expr::{expr::ScalarFunction, Expr}; +use datafusion_common::ScalarValue; +use datafusion_expr::{ + expr::{FieldMetadata, ScalarFunction}, + Expr, +}; use savvy::{savvy, savvy_err}; use sedona::context::SedonaContext; +use crate::ffi::import_array; + #[savvy] pub struct SedonaDBExpr { pub inner: Expr, } +#[savvy] +impl SedonaDBExpr { + fn debug_string(&self) -> savvy::Result { + format!("{:?}", self.inner).try_into() + } +} + #[savvy] pub struct SedonaDBExprFactory { pub ctx: Arc, @@ -33,6 +46,22 @@ pub struct SedonaDBExprFactory { #[savvy] impl SedonaDBExprFactory { + fn literal( + array_xptr: savvy::Sexp, + schema_xptr: savvy::Sexp, + ) -> savvy::Result { + let (field, array_ref) = import_array(array_xptr, schema_xptr)?; + let metadata = if field.metadata().is_empty() { + None + } else { + Some(FieldMetadata::new_from_field(&field)) + }; + + let scalar_value = ScalarValue::try_from_array(&array_ref, 0)?; + let inner = Expr::Literal(scalar_value, metadata); + Ok(SedonaDBExpr { inner }) + } + fn scalar_function(&self, name: &str, args: savvy::Sexp) -> savvy::Result { if let Some(scalar_udf) = self.ctx.ctx.state().scalar_functions().get(name) { let args = Self::exprs(args)?; diff --git a/r/sedonadb/src/rust/src/ffi.rs b/r/sedonadb/src/rust/src/ffi.rs index 4275e2648..5ffb40cf6 100644 --- a/r/sedonadb/src/rust/src/ffi.rs +++ b/r/sedonadb/src/rust/src/ffi.rs @@ -18,10 +18,11 @@ use std::sync::Arc; use arrow_array::{ - ffi::FFI_ArrowSchema, + ffi::{from_ffi_and_data_type, FFI_ArrowArray, FFI_ArrowSchema}, ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, + make_array, ArrayRef, }; -use arrow_schema::Schema; +use arrow_schema::{Field, Schema}; use datafusion::catalog::TableProvider; use datafusion_expr::ScalarUDF; use datafusion_ffi::{ @@ -36,6 +37,24 @@ pub fn import_schema(mut xptr: savvy::Sexp) -> savvy::Result { Ok(schema) } +pub fn import_field(mut xptr: savvy::Sexp) -> savvy::Result { + let ffi_schema: &FFI_ArrowSchema = import_xptr(&mut xptr, "nanoarrow_schema")?; + let schema = Field::try_from(ffi_schema)?; + Ok(schema) +} + +pub fn import_array( + mut xptr: savvy::Sexp, + schema_xptr: savvy::Sexp, +) -> savvy::Result<(Field, ArrayRef)> { + let field = import_field(schema_xptr)?; + let ffi_array_ref: &mut FFI_ArrowArray = import_xptr(&mut xptr, "nanoarrow_array")?; + let ffi_array = unsafe { FFI_ArrowArray::from_raw(ffi_array_ref as _) }; + let array_data = unsafe { from_ffi_and_data_type(ffi_array as _, field.data_type().clone())? }; + let array_ref = make_array(array_data); + Ok((field, array_ref)) +} + pub fn import_array_stream(mut xptr: savvy::Sexp) -> savvy::Result { let ffi_stream: &mut FFI_ArrowArrayStream = import_xptr(&mut xptr, "nanoarrow_array_stream")?; let reader = unsafe { ArrowArrayStreamReader::from_raw(ffi_stream as _)? }; From 1c104c03b025d79319e4e3acf581f57cb97c971f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 27 Dec 2025 23:48:05 -0600 Subject: [PATCH 04/26] test the basics --- r/sedonadb/NAMESPACE | 1 + r/sedonadb/R/expression.R | 7 ++- .../tests/testthat/_snaps/expression.md | 8 ++++ r/sedonadb/tests/testthat/test-expression.R | 44 +++++++++++++++++++ 4 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 r/sedonadb/tests/testthat/_snaps/expression.md create mode 100644 r/sedonadb/tests/testthat/test-expression.R diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index 2a9f60735..536229869 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -15,6 +15,7 @@ S3method(as_sedonadb_expr,character) S3method(as_sedonadb_expr,double) S3method(as_sedonadb_expr,integer) S3method(as_sedonadb_expr,nanoarrow_array) +S3method(as_sedonadb_expr,raw) S3method(dim,sedonadb_dataframe) S3method(dimnames,sedonadb_dataframe) S3method(head,sedonadb_dataframe) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 27ed3ffbf..22bc3cb36 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -47,9 +47,14 @@ as_sedonadb_expr.double <- function(x, ..., type = NULL) { as_sedonadb_expr_from_nanoarrow(x, ..., type = type) } +#' @export +as_sedonadb_expr.raw <- function(x, ..., type = NULL) { + as_sedonadb_expr_from_nanoarrow(list(x), ..., type = type) +} + as_sedonadb_expr_from_nanoarrow <- function(x, ..., type = NULL) { if (length(x) != 1 || is.object(x)) { - stop("Can't convert non-scalar chr to sedonadb_expr") + stop("Can't convert non-scalar to sedonadb_expr") } array <- nanoarrow::as_nanoarrow_array(x) diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md new file mode 100644 index 000000000..486457fff --- /dev/null +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -0,0 +1,8 @@ +# expressions can be printed + + Code + print(as_sedonadb_expr("foofy")) + Output + + Literal(Utf8("foofy"), None) + diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R new file mode 100644 index 000000000..c10518b65 --- /dev/null +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_that("basic literals can be converted to expressions", { + expect_identical( + as_sedonadb_expr("foofy")$debug_string(), + 'Literal(Utf8("foofy"), None)' + ) + + expect_identical( + as_sedonadb_expr(1L)$debug_string(), + 'Literal(Int32(1), None)' + ) + + expect_identical( + as_sedonadb_expr(1.0)$debug_string(), + 'Literal(Float64(1), None)' + ) + + expect_identical( + as_sedonadb_expr(as.raw(c(1:3)))$debug_string(), + 'Literal(Binary("1,2,3"), None)' + ) +}) + +test_that("expressions can be printed", { + expect_snapshot( + print(as_sedonadb_expr("foofy")) + ) +}) From a62da93f56eac450f8de077b6ef0467f6ce5f6ef Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 30 Dec 2025 01:01:31 -0600 Subject: [PATCH 05/26] one working expr translation --- .pre-commit-config.yaml | 2 +- r/sedonadb/R/000-wrappers.R | 10 +- r/sedonadb/R/expression.R | 127 +++++++++++++++++- r/sedonadb/configure | 2 +- r/sedonadb/src/init.c | 16 +++ r/sedonadb/src/rust/api.h | 3 + r/sedonadb/src/rust/src/expression.rs | 24 +++- r/sedonadb/src/rust/src/lib.rs | 2 +- .../tests/testthat/_snaps/expression.md | 1 - r/sedonadb/tests/testthat/test-expression.R | 7 + 10 files changed, 180 insertions(+), 14 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d8a932894..654b84e28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: - id: codespell # types_or: [markdown, c, c++, rust, python] additional_dependencies: [tomli] - exclude: "^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-libgpuspatial/libgpuspatial|sedona-tg/src/tg)/.*|^docs/image/sedonadb-architecture\\.svg$" + exclude: "^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-libgpuspatial/libgpuspatial|sedona-tg/src/tg)/.*|^docs/image/sedonadb-architecture\\.svg$|^r/sedonadb/tools/savvy-update.sh$" args: ["--ignore-words-list=thirdparty"] - repo: https://github.com/astral-sh/ruff-pre-commit diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R index 0482ac8d4..d026aa245 100644 --- a/r/sedonadb/R/000-wrappers.R +++ b/r/sedonadb/R/000-wrappers.R @@ -321,10 +321,19 @@ class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__se ### associated functions for SedonaDBExprFactory +`SedonaDBExprFactory`$`column` <- function(`name`, `qualifier` = NULL) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_column__impl, `name`, `qualifier`)) +} + `SedonaDBExprFactory`$`literal` <- function(`array_xptr`, `schema_xptr`) { .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_literal__impl, `array_xptr`, `schema_xptr`)) } +`SedonaDBExprFactory`$`new` <- function(`ctx`) { + `ctx` <- .savvy_extract_ptr(`ctx`, "sedonadb::InternalContext") + .savvy_wrap_SedonaDBExprFactory(.Call(savvy_SedonaDBExprFactory_new__impl, `ctx`)) +} + class(`SedonaDBExprFactory`) <- c("sedonadb::SedonaDBExprFactory__bundle", "savvy_sedonadb__sealed") @@ -332,4 +341,3 @@ class(`SedonaDBExprFactory`) <- c("sedonadb::SedonaDBExprFactory__bundle", "savv `print.sedonadb::SedonaDBExprFactory__bundle` <- function(x, ...) { cat('sedonadb::SedonaDBExprFactory\n') } - diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 22bc3cb36..f13fe3fe7 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -20,10 +20,12 @@ #' @param x An object #' @param ... Passed to/from methods #' @param type An optional data type to request for the output +#' @param factory An expression factory object that should be passed to any +#' other calls to `as_sedonadb_expr()`. #' #' @returns An object of class SedonaDBExpr #' @export -as_sedonadb_expr <- function(x, ..., type = NULL) { +as_sedonadb_expr <- function(x, ..., type = NULL, factory = NULL) { UseMethod("as_sedonadb_expr") } @@ -33,7 +35,7 @@ as_sedonadb_expr.SedonaDBExpr <- function(x, ..., type = NULL) { } #' @export -as_sedonadb_expr.character <- function(x, ..., type = NULL) { +as_sedonadb_expr.character <- function(x, ..., type = NULL, factory = NULL) { as_sedonadb_expr_from_nanoarrow(x, ..., type = type) } @@ -87,3 +89,124 @@ print.SedonaDBExpr <- function(x, ...) { cat("\n") invisible(x) } + + +sd_eval_expr <- function(expr, expr_type = NULL, expr_ctx = sd_expr_ctx()) { + if (rlang::is_call(expr)) { + # If there is no expression anywhere in this call, just evaluate it in R + # and move on. + if (!r_expr_contains_sedonadb_expr(expr, expr_ctx)) { + return(sd_eval_default(expr, expr_type, expr_ctx)) + } + + # Handle `pkg::fun` or `fun` + call_name <- rlang::call_name(expr) + if (!is.null(call_name) && !is.null(expr_ctx$fns[[call_name]])) { + return(sd_eval_translation(call_name, expr, expr_type, expr_ctx)) + } else { + # Otherwise we have an inlined function and we just have to evaluate + return(sd_eval_default(expr, expr_type, expr_ctx)) + } + } + + sd_eval_default(expr, expr_type, expr_ctx) +} + +sd_eval_translation <- function(fn_key, expr, expr_type, expr_ctx) { + # Replace the function with the translation in such a way that + # any error resulting from the call doesn't have an absolute garbage error + # stack trace + new_fn_expr <- rlang::call2("$", expr_ctx$fns, rlang::sym(fn_key)) + + # Evaluate arguments individually. We may need to allow translations to + # override this step to have more control over the expression evaluation. + evaluated_args <- lapply(expr[-1], sd_eval_expr, expr_ctx = expr_ctx) + + # Recreate the call, injecting the factory as the first argument + new_call <- rlang::call2(new_fn_expr, expr_ctx$factory, !!!evaluated_args) + + # ...and evaluate it. We may need to catch an error because we've injected + # the arguments as atomics instead of the original expression typed by the user. + sd_eval_default(new_call, expr_type, expr_ctx) +} + +sd_eval_default <- function(expr, expr_type, expr_ctx) { + r_result <- rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env) + as_sedonadb_expr(r_result, expr_type = expr_type) +} + +r_expr_contains_sedonadb_expr <- function(expr, expr_ctx) { + if (rlang::is_call(expr, c("$", "[[")) && rlang::is_symbol(expr[[1]], ".data")) { + # An attempt to access the .data pronoun will either error or return an + # SedonaDB expression + TRUE + } else if (rlang::is_symbol(expr, expr_ctx$data_names)) { + TRUE + } else if (rlang::is_call(expr)) { + for (i in seq_along(expr)) { + if (r_expr_contains_sedonadb_expr(expr[[i]], expr_ctx)) { + return(TRUE) + } + } + + FALSE + } else if(rlang::is_atomic(expr)) { + inherits(x, "sedonadb_expr") + } else { + FALSE + } +} + +#' Expression evaluation context +#' +#' A context to use for evaluating a set of related R expressions into +#' SedonaDB expressions. +#' +#' @param schema A schema-like object coerced using +#' [nanoarrow::as_nanoarrow_schema()]. +#' +#' @return An object of class sedonadb_expr_ctx +#' @export +#' +#' @examples +#' sd_expr_ctx() +#' +sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { + if (is.null(schema)) { + schema <- nanoarrow::na_struct() + } + + schema <- nanoarrow::as_nanoarrow_schema(schema) + data_names <- names(schema$children) + data <- lapply(data_names, SedonaDBExprFactory$column) + names(data) <- data_names + + structure( + list( + factory = SedonaDBExprFactory$new(ctx()), + schema = schema, + data = rlang::as_data_mask(data), + data_names = data_names, + env = env, + fns = default_fns + ), + class = "sedonadb_expr_ctx" + ) +} + + +sd_register_translation <- function(qualified_name, fn) { + stopifnot(is.function(fn)) + + pieces <- strsplit(qualified_name, "::")[[1]] + unqualified_name <- pieces[[2]] + + default_fns[[qualified_name]] <- default_fns[[unqualified_name]] <- fn +} + +default_fns <- new.env(parent = emptyenv()) + +sd_register_translation("base::abs", function(.factory, x) { + # Not sure why I need $.ptr here + .factory$scalar_function("abs", list(x$.ptr)) +}) diff --git a/r/sedonadb/configure b/r/sedonadb/configure index a4f74a619..4bd8fde96 100755 --- a/r/sedonadb/configure +++ b/r/sedonadb/configure @@ -22,7 +22,7 @@ pkg-config geos 2>/dev/null if [ $? -eq 0 ]; then - PKGCONFIG_LIBS=`pkg-config --libs geos` + PKGCONFIG_LIBS=`pkg-config --libs geos --static` fi if [ "$LIB_DIR" ]; then diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c index 6c9c9e159..44583102f 100644 --- a/r/sedonadb/src/init.c +++ b/r/sedonadb/src/init.c @@ -214,6 +214,13 @@ SEXP savvy_SedonaDBExpr_debug_string__impl(SEXP self__) { return handle_result(res); } +SEXP savvy_SedonaDBExprFactory_column__impl(SEXP c_arg__name, + SEXP c_arg__qualifier) { + SEXP res = + savvy_SedonaDBExprFactory_column__ffi(c_arg__name, c_arg__qualifier); + return handle_result(res); +} + SEXP savvy_SedonaDBExprFactory_literal__impl(SEXP c_arg__array_xptr, SEXP c_arg__schema_xptr) { SEXP res = savvy_SedonaDBExprFactory_literal__ffi(c_arg__array_xptr, @@ -221,6 +228,11 @@ SEXP savvy_SedonaDBExprFactory_literal__impl(SEXP c_arg__array_xptr, return handle_result(res); } +SEXP savvy_SedonaDBExprFactory_new__impl(SEXP c_arg__ctx) { + SEXP res = savvy_SedonaDBExprFactory_new__ffi(c_arg__ctx); + return handle_result(res); +} + SEXP savvy_SedonaDBExprFactory_scalar_function__impl(SEXP self__, SEXP c_arg__name, SEXP c_arg__args) { @@ -280,8 +292,12 @@ static const R_CallMethodDef CallEntries[] = { (DL_FUNC)&savvy_InternalDataFrame_to_view__impl, 4}, {"savvy_SedonaDBExpr_debug_string__impl", (DL_FUNC)&savvy_SedonaDBExpr_debug_string__impl, 1}, + {"savvy_SedonaDBExprFactory_column__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_column__impl, 2}, {"savvy_SedonaDBExprFactory_literal__impl", (DL_FUNC)&savvy_SedonaDBExprFactory_literal__impl, 2}, + {"savvy_SedonaDBExprFactory_new__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_new__impl, 1}, {"savvy_SedonaDBExprFactory_scalar_function__impl", (DL_FUNC)&savvy_SedonaDBExprFactory_scalar_function__impl, 3}, {NULL, NULL, 0}}; diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h index ddc3a5449..ef302acba 100644 --- a/r/sedonadb/src/rust/api.h +++ b/r/sedonadb/src/rust/api.h @@ -65,8 +65,11 @@ SEXP savvy_InternalDataFrame_to_view__ffi(SEXP self__, SEXP c_arg__ctx, SEXP savvy_SedonaDBExpr_debug_string__ffi(SEXP self__); // methods and associated functions for SedonaDBExprFactory +SEXP savvy_SedonaDBExprFactory_column__ffi(SEXP c_arg__name, + SEXP c_arg__qualifier); SEXP savvy_SedonaDBExprFactory_literal__ffi(SEXP c_arg__array_xptr, SEXP c_arg__schema_xptr); +SEXP savvy_SedonaDBExprFactory_new__ffi(SEXP c_arg__ctx); SEXP savvy_SedonaDBExprFactory_scalar_function__ffi(SEXP self__, SEXP c_arg__name, SEXP c_arg__args); diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs index f8c2a9806..777c344ca 100644 --- a/r/sedonadb/src/rust/src/expression.rs +++ b/r/sedonadb/src/rust/src/expression.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use datafusion_common::ScalarValue; +use datafusion_common::{Column, ScalarValue}; use datafusion_expr::{ expr::{FieldMetadata, ScalarFunction}, Expr, @@ -25,7 +25,7 @@ use datafusion_expr::{ use savvy::{savvy, savvy_err}; use sedona::context::SedonaContext; -use crate::ffi::import_array; +use crate::{context::InternalContext, ffi::import_array}; #[savvy] pub struct SedonaDBExpr { @@ -46,10 +46,13 @@ pub struct SedonaDBExprFactory { #[savvy] impl SedonaDBExprFactory { - fn literal( - array_xptr: savvy::Sexp, - schema_xptr: savvy::Sexp, - ) -> savvy::Result { + fn new(ctx: &InternalContext) -> Self { + Self { + ctx: ctx.inner.clone(), + } + } + + fn literal(array_xptr: savvy::Sexp, schema_xptr: savvy::Sexp) -> savvy::Result { let (field, array_ref) = import_array(array_xptr, schema_xptr)?; let metadata = if field.metadata().is_empty() { None @@ -62,6 +65,11 @@ impl SedonaDBExprFactory { Ok(SedonaDBExpr { inner }) } + fn column(name: &str, qualifier: Option<&str>) -> savvy::Result { + let inner = Expr::Column(Column::new(qualifier, name)); + Ok(SedonaDBExpr { inner }) + } + fn scalar_function(&self, name: &str, args: savvy::Sexp) -> savvy::Result { if let Some(scalar_udf) = self.ctx.ctx.state().scalar_functions().get(name) { let args = Self::exprs(args)?; @@ -80,7 +88,9 @@ impl SedonaDBExprFactory { savvy::ListSexp::try_from(exprs_sexp)? .iter() .map(|(_, item)| -> savvy::Result { - let expr_wrapper = SedonaDBExpr::try_from(item)?; + // This seems to require $.ptr from the list() input (can't just + // use list of R SedonaDBExpr objects) + let expr_wrapper: &SedonaDBExpr = item.try_into()?; Ok(expr_wrapper.inner.clone()) }) .collect() diff --git a/r/sedonadb/src/rust/src/lib.rs b/r/sedonadb/src/rust/src/lib.rs index 40c4e9e07..842519087 100644 --- a/r/sedonadb/src/rust/src/lib.rs +++ b/r/sedonadb/src/rust/src/lib.rs @@ -26,8 +26,8 @@ use sedona_proj::register::{configure_global_proj_engine, ProjCrsEngineBuilder}; mod context; mod dataframe; -mod expression; mod error; +mod expression; mod ffi; mod runtime; diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index 486457fff..60d54bd52 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -5,4 +5,3 @@ Output Literal(Utf8("foofy"), None) - diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index c10518b65..7309746f9 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -37,6 +37,13 @@ test_that("basic literals can be converted to expressions", { ) }) +test_that("non-scalars can't be automatically converted to literals", { + expect_error( + as_sedonadb_expr(1:5)$debug_string(), + "Can't convert non-scalar to sedonadb_expr" + ) +}) + test_that("expressions can be printed", { expect_snapshot( print(as_sedonadb_expr("foofy")) From 26a5dad35446a441c20752c1b25c44cb049e7621 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 30 Dec 2025 23:42:04 -0600 Subject: [PATCH 06/26] test --- r/sedonadb/R/expression.R | 118 ++++++++++-------- .../tests/testthat/_snaps/expression.md | 49 ++++++++ r/sedonadb/tests/testthat/test-expression.R | 30 +++++ 3 files changed, 148 insertions(+), 49 deletions(-) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index f13fe3fe7..1dbc938b7 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -90,29 +90,69 @@ print.SedonaDBExpr <- function(x, ...) { invisible(x) } +#' Expression evaluation context +#' +#' A context to use for evaluating a set of related R expressions into +#' SedonaDB expressions. +#' +#' @param schema A schema-like object coerced using +#' [nanoarrow::as_nanoarrow_schema()]. +#' +#' @return An object of class sedonadb_expr_ctx +#' @noRd +sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { + if (is.null(schema)) { + schema <- nanoarrow::na_struct() + } + + schema <- nanoarrow::as_nanoarrow_schema(schema) + data_names <- as.character(names(schema$children)) + data <- lapply(data_names, SedonaDBExprFactory$column) + names(data) <- data_names -sd_eval_expr <- function(expr, expr_type = NULL, expr_ctx = sd_expr_ctx()) { + structure( + list( + factory = SedonaDBExprFactory$new(ctx()), + schema = schema, + data = rlang::as_data_mask(data), + data_names = data_names, + env = env, + fns = default_fns + ), + class = "sedonadb_expr_ctx" + ) +} + + +#' Evaluate an R expression into a SedonaDB expression +#' +#' @param expr An R expression (e.g., the result of `quote()`). +#' @param expr_ctx An [sd_expr_ctx()] +#' +#' @returns A `SedonaDBExpr` +#' @noRd +sd_eval_expr <- function(expr, expr_ctx = sd_expr_ctx()) { if (rlang::is_call(expr)) { # If there is no expression anywhere in this call, just evaluate it in R # and move on. if (!r_expr_contains_sedonadb_expr(expr, expr_ctx)) { - return(sd_eval_default(expr, expr_type, expr_ctx)) + return(sd_eval_default(expr, expr_ctx)) } # Handle `pkg::fun` or `fun` call_name <- rlang::call_name(expr) if (!is.null(call_name) && !is.null(expr_ctx$fns[[call_name]])) { - return(sd_eval_translation(call_name, expr, expr_type, expr_ctx)) + return(sd_eval_translation(call_name, expr, expr_ctx)) } else { # Otherwise we have an inlined function and we just have to evaluate - return(sd_eval_default(expr, expr_type, expr_ctx)) + return(sd_eval_default(expr, expr_ctx)) } } - sd_eval_default(expr, expr_type, expr_ctx) + sd_eval_default(expr, expr_ctx) } -sd_eval_translation <- function(fn_key, expr, expr_type, expr_ctx) { +sd_eval_translation <- function(fn_key, expr, expr_ctx) { # Replace the function with the translation in such a way that # any error resulting from the call doesn't have an absolute garbage error # stack trace @@ -125,14 +165,20 @@ sd_eval_translation <- function(fn_key, expr, expr_type, expr_ctx) { # Recreate the call, injecting the factory as the first argument new_call <- rlang::call2(new_fn_expr, expr_ctx$factory, !!!evaluated_args) - # ...and evaluate it. We may need to catch an error because we've injected - # the arguments as atomics instead of the original expression typed by the user. - sd_eval_default(new_call, expr_type, expr_ctx) + # ...and evaluate it + sd_eval_default(new_call, expr_ctx) } -sd_eval_default <- function(expr, expr_type, expr_ctx) { - r_result <- rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env) - as_sedonadb_expr(r_result, expr_type = expr_type) +sd_eval_default <- function(expr, expr_ctx) { + rlang::try_fetch({ + r_result <- rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env) + as_sedonadb_expr(r_result) + }, error = function(e) { + rlang::abort( + "SedonaDB R evaluation error", + parent = e + ) + }) } r_expr_contains_sedonadb_expr <- function(expr, expr_ctx) { @@ -151,50 +197,23 @@ r_expr_contains_sedonadb_expr <- function(expr, expr_ctx) { FALSE } else if(rlang::is_atomic(expr)) { - inherits(x, "sedonadb_expr") + inherits(expr, "sedonadb_expr") } else { FALSE } } -#' Expression evaluation context +#' Register an R function translation into a SedonaDB expression #' -#' A context to use for evaluating a set of related R expressions into -#' SedonaDB expressions. -#' -#' @param schema A schema-like object coerced using -#' [nanoarrow::as_nanoarrow_schema()]. -#' -#' @return An object of class sedonadb_expr_ctx -#' @export +#' @param qualified_name The name of the function in the form `pkg::fun` or +#' `fun` if the package name is not relevant. This allows translations to +#' support calls to `fun()` or `pkg::fun()` that appear in an R expression. +#' @param fn A function. The first argument should always be `.factory`, which +#' is the instance of `SedonaDBExprFactory` that may be used to construct +#' the required expressions. #' -#' @examples -#' sd_expr_ctx() -#' -sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { - if (is.null(schema)) { - schema <- nanoarrow::na_struct() - } - - schema <- nanoarrow::as_nanoarrow_schema(schema) - data_names <- names(schema$children) - data <- lapply(data_names, SedonaDBExprFactory$column) - names(data) <- data_names - - structure( - list( - factory = SedonaDBExprFactory$new(ctx()), - schema = schema, - data = rlang::as_data_mask(data), - data_names = data_names, - env = env, - fns = default_fns - ), - class = "sedonadb_expr_ctx" - ) -} - - +#' @returns fn, invisibly +#' @noRd sd_register_translation <- function(qualified_name, fn) { stopifnot(is.function(fn)) @@ -202,6 +221,7 @@ sd_register_translation <- function(qualified_name, fn) { unqualified_name <- pieces[[2]] default_fns[[qualified_name]] <- default_fns[[unqualified_name]] <- fn + invisible(fn) } default_fns <- new.env(parent = emptyenv()) diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index 60d54bd52..ad32af2b5 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -5,3 +5,52 @@ Output Literal(Utf8("foofy"), None) + +# literal expressions can be translated + + Code + sd_eval_expr(quote(1L)) + Output + + Literal(Int32(1), None) + +# column expressions can be translated + + Code + sd_eval_expr(quote(col0), expr_ctx) + Output + + Column(Column { relation: None, name: "col0" }) + +--- + + Code + sd_eval_expr(quote(.data$col0), expr_ctx) + Output + + Column(Column { relation: None, name: "col0" }) + +--- + + Code + sd_eval_expr(quote(.data[[col_zero]]), expr_ctx) + Output + + Column(Column { relation: None, name: "col0" }) + +# function calls containing no SedonaDB expressions can be translated + + Code + sd_eval_expr(quote(abs(-1L))) + Output + + Literal(Int32(1), None) + +# function calls containing SedonaDB expressions can be translated + + Code + sd_eval_expr(quote(abs(col0)), expr_ctx) + Output + + ScalarFunction(ScalarFunction { func: ScalarUDF { inner: AbsFunc { signature: Signature { type_signature: Numeric(1), volatility: Immutable } } }, args: [Column(Column { relation: None, name: "col0" })] }) + diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index 7309746f9..992d0d0ef 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -49,3 +49,33 @@ test_that("expressions can be printed", { print(as_sedonadb_expr("foofy")) ) }) + +test_that("literal expressions can be translated", { + expect_snapshot(sd_eval_expr(quote(1L))) +}) + +test_that("column expressions can be translated", { + schema <- nanoarrow::na_struct(list(col0 = nanoarrow::na_int32())) + expr_ctx <- sd_expr_ctx(schema) + + expect_snapshot(sd_eval_expr(quote(col0), expr_ctx)) + expect_snapshot(sd_eval_expr(quote(.data$col0), expr_ctx)) + col_zero <- "col0" + expect_snapshot(sd_eval_expr(quote(.data[[col_zero]]), expr_ctx)) + + expect_error( + sd_eval_expr(quote(col1), expr_ctx), + "object 'col1' not found" + ) +}) + +test_that("function calls containing no SedonaDB expressions can be translated", { + # Ensure these are evaluated in R (i.e., the resulting expression is a literal) + expect_snapshot(sd_eval_expr(quote(abs(-1L)))) +}) + +test_that("function calls containing SedonaDB expressions can be translated", { + schema <- nanoarrow::na_struct(list(col0 = nanoarrow::na_int32())) + expr_ctx <- sd_expr_ctx(schema) + expect_snapshot(sd_eval_expr(quote(abs(col0)), expr_ctx)) +}) From 0729541a23337bcb0e39b8ae2c40b51b1545ef00 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 31 Dec 2025 20:39:31 -0600 Subject: [PATCH 07/26] remove config update --- r/sedonadb/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/sedonadb/configure b/r/sedonadb/configure index 4bd8fde96..a4f74a619 100755 --- a/r/sedonadb/configure +++ b/r/sedonadb/configure @@ -22,7 +22,7 @@ pkg-config geos 2>/dev/null if [ $? -eq 0 ]; then - PKGCONFIG_LIBS=`pkg-config --libs geos --static` + PKGCONFIG_LIBS=`pkg-config --libs geos` fi if [ "$LIB_DIR" ]; then From 8f505aea2f03053156cc70700cd1339637b83875 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 31 Dec 2025 20:42:27 -0600 Subject: [PATCH 08/26] add a comment --- r/sedonadb/tests/testthat/test-expression.R | 1 + 1 file changed, 1 insertion(+) diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index 992d0d0ef..a7860dc55 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -75,6 +75,7 @@ test_that("function calls containing no SedonaDB expressions can be translated", }) test_that("function calls containing SedonaDB expressions can be translated", { + # Ensure these are translated as a function call schema <- nanoarrow::na_struct(list(col0 = nanoarrow::na_int32())) expr_ctx <- sd_expr_ctx(schema) expect_snapshot(sd_eval_expr(quote(abs(col0)), expr_ctx)) From 0c772399281d9c4d0e681923b51795008750c7fd Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 31 Dec 2025 20:42:45 -0600 Subject: [PATCH 09/26] remove empty file --- r/sedonadb/R/type.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 r/sedonadb/R/type.R diff --git a/r/sedonadb/R/type.R b/r/sedonadb/R/type.R deleted file mode 100644 index e69de29bb..000000000 From d5ee038f92c47bec95ab98e0f68ff7e9c07485d6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 31 Dec 2025 20:46:37 -0600 Subject: [PATCH 10/26] exlude snapshot tests files --- .pre-commit-config.yaml | 2 ++ dev/release/rat_exclude_files.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 654b84e28..fba9714aa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,8 @@ repos: - id: check-yaml - id: detect-private-key - id: end-of-file-fixer + # R snapshot test files may have arbitrary file endings based on test results + exclude: "_snaps" - id: fix-byte-order-marker - id: trailing-whitespace diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 6938c0fc7..bad4cb5c6 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -25,4 +25,5 @@ r/sedonadb/.Rbuildignore r/sedonadb/DESCRIPTION r/sedonadb/NAMESPACE r/sedonadb/src/sedonadb-win.def +r/sedonadb/tests/testthat/_snaps/* submodules/geoarrow-data/* From af4d621c9454cd650b5cae11133b543dbf125411 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 31 Dec 2025 20:56:34 -0600 Subject: [PATCH 11/26] as_sedonadb_expr -> as_sedonadb_literal --- r/sedonadb/NAMESPACE | 14 +++---- r/sedonadb/R/expression.R | 38 +++++++++---------- r/sedonadb/configure | 2 +- r/sedonadb/man/as_sedonadb_expr.Rd | 21 ---------- r/sedonadb/man/as_sedonadb_literal.Rd | 24 ++++++++++++ .../tests/testthat/_snaps/expression.md | 2 +- r/sedonadb/tests/testthat/test-expression.R | 12 +++--- 7 files changed, 58 insertions(+), 55 deletions(-) delete mode 100644 r/sedonadb/man/as_sedonadb_expr.Rd create mode 100644 r/sedonadb/man/as_sedonadb_literal.Rd diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index 536229869..785ee58cb 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -10,12 +10,12 @@ S3method(as_sedonadb_dataframe,nanoarrow_array) S3method(as_sedonadb_dataframe,nanoarrow_array_stream) S3method(as_sedonadb_dataframe,sedonadb_dataframe) S3method(as_sedonadb_dataframe,sf) -S3method(as_sedonadb_expr,SedonaDBExpr) -S3method(as_sedonadb_expr,character) -S3method(as_sedonadb_expr,double) -S3method(as_sedonadb_expr,integer) -S3method(as_sedonadb_expr,nanoarrow_array) -S3method(as_sedonadb_expr,raw) +S3method(as_sedonadb_literal,SedonaDBExpr) +S3method(as_sedonadb_literal,character) +S3method(as_sedonadb_literal,double) +S3method(as_sedonadb_literal,integer) +S3method(as_sedonadb_literal,nanoarrow_array) +S3method(as_sedonadb_literal,raw) S3method(dim,sedonadb_dataframe) S3method(dimnames,sedonadb_dataframe) S3method(head,sedonadb_dataframe) @@ -27,7 +27,7 @@ S3method(print,"sedonadb::SedonaDBExpr__bundle") S3method(print,SedonaDBExpr) S3method(print,sedonadb_dataframe) export(as_sedonadb_dataframe) -export(as_sedonadb_expr) +export(as_sedonadb_literal) export(sd_collect) export(sd_compute) export(sd_configure_proj) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 1dbc938b7..d3707e5a0 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -15,56 +15,56 @@ # specific language governing permissions and limitations # under the License. -#' Create a SedonaDB Logical Expression +#' Create a SedonaDB Literal Expression #' -#' @param x An object +#' @param x An object to convert to a SedonaDB literal #' @param ... Passed to/from methods #' @param type An optional data type to request for the output #' @param factory An expression factory object that should be passed to any -#' other calls to `as_sedonadb_expr()`. +#' other calls to `as_sedonadb_literal()`. #' #' @returns An object of class SedonaDBExpr #' @export -as_sedonadb_expr <- function(x, ..., type = NULL, factory = NULL) { - UseMethod("as_sedonadb_expr") +as_sedonadb_literal <- function(x, ..., type = NULL, factory = NULL) { + UseMethod("as_sedonadb_literal") } #' @export -as_sedonadb_expr.SedonaDBExpr <- function(x, ..., type = NULL) { +as_sedonadb_literal.SedonaDBExpr <- function(x, ..., type = NULL) { handle_type_request(x, type) } #' @export -as_sedonadb_expr.character <- function(x, ..., type = NULL, factory = NULL) { - as_sedonadb_expr_from_nanoarrow(x, ..., type = type) +as_sedonadb_literal.character <- function(x, ..., type = NULL, factory = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) } #' @export -as_sedonadb_expr.integer <- function(x, ..., type = NULL) { - as_sedonadb_expr_from_nanoarrow(x, ..., type = type) +as_sedonadb_literal.integer <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) } #' @export -as_sedonadb_expr.double <- function(x, ..., type = NULL) { - as_sedonadb_expr_from_nanoarrow(x, ..., type = type) +as_sedonadb_literal.double <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) } #' @export -as_sedonadb_expr.raw <- function(x, ..., type = NULL) { - as_sedonadb_expr_from_nanoarrow(list(x), ..., type = type) +as_sedonadb_literal.raw <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(list(x), ..., type = type) } -as_sedonadb_expr_from_nanoarrow <- function(x, ..., type = NULL) { +as_sedonadb_literal_from_nanoarrow <- function(x, ..., type = NULL) { if (length(x) != 1 || is.object(x)) { stop("Can't convert non-scalar to sedonadb_expr") } array <- nanoarrow::as_nanoarrow_array(x) - as_sedonadb_expr(array, type = type) + as_sedonadb_literal(array, type = type) } #' @export -as_sedonadb_expr.nanoarrow_array <- function(x, ..., type = NULL) { +as_sedonadb_literal.nanoarrow_array <- function(x, ..., type = NULL) { schema <- nanoarrow::infer_nanoarrow_schema(x) array_export <- nanoarrow::nanoarrow_allocate_array() @@ -127,7 +127,7 @@ sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { #' Evaluate an R expression into a SedonaDB expression #' #' @param expr An R expression (e.g., the result of `quote()`). -#' @param expr_ctx An [sd_expr_ctx()] +#' @param expr_ctx An `sd_expr_ctx()` #' #' @returns A `SedonaDBExpr` #' @noRd @@ -172,7 +172,7 @@ sd_eval_translation <- function(fn_key, expr, expr_ctx) { sd_eval_default <- function(expr, expr_ctx) { rlang::try_fetch({ r_result <- rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env) - as_sedonadb_expr(r_result) + as_sedonadb_literal(r_result) }, error = function(e) { rlang::abort( "SedonaDB R evaluation error", diff --git a/r/sedonadb/configure b/r/sedonadb/configure index a4f74a619..4bd8fde96 100755 --- a/r/sedonadb/configure +++ b/r/sedonadb/configure @@ -22,7 +22,7 @@ pkg-config geos 2>/dev/null if [ $? -eq 0 ]; then - PKGCONFIG_LIBS=`pkg-config --libs geos` + PKGCONFIG_LIBS=`pkg-config --libs geos --static` fi if [ "$LIB_DIR" ]; then diff --git a/r/sedonadb/man/as_sedonadb_expr.Rd b/r/sedonadb/man/as_sedonadb_expr.Rd deleted file mode 100644 index f12b1a376..000000000 --- a/r/sedonadb/man/as_sedonadb_expr.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expression.R -\name{as_sedonadb_expr} -\alias{as_sedonadb_expr} -\title{Create a SedonaDB Logical Expression} -\usage{ -as_sedonadb_expr(x, ..., type = NULL) -} -\arguments{ -\item{x}{An object} - -\item{...}{Passed to/from methods} - -\item{type}{An optional data type to request for the output} -} -\value{ -An object of class SedonaDBExpr -} -\description{ -Create a SedonaDB Logical Expression -} diff --git a/r/sedonadb/man/as_sedonadb_literal.Rd b/r/sedonadb/man/as_sedonadb_literal.Rd new file mode 100644 index 000000000..be32362b6 --- /dev/null +++ b/r/sedonadb/man/as_sedonadb_literal.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expression.R +\name{as_sedonadb_literal} +\alias{as_sedonadb_literal} +\title{Create a SedonaDB Literal Expression} +\usage{ +as_sedonadb_literal(x, ..., type = NULL, factory = NULL) +} +\arguments{ +\item{x}{An object to convert to a SedonaDB literal} + +\item{...}{Passed to/from methods} + +\item{type}{An optional data type to request for the output} + +\item{factory}{An expression factory object that should be passed to any +other calls to \code{as_sedonadb_literal()}.} +} +\value{ +An object of class SedonaDBExpr +} +\description{ +Create a SedonaDB Literal Expression +} diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index ad32af2b5..d7fd4ef35 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -1,7 +1,7 @@ # expressions can be printed Code - print(as_sedonadb_expr("foofy")) + print(as_sedonadb_literal("foofy")) Output Literal(Utf8("foofy"), None) diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index a7860dc55..91fac00bf 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -17,36 +17,36 @@ test_that("basic literals can be converted to expressions", { expect_identical( - as_sedonadb_expr("foofy")$debug_string(), + as_sedonadb_literal("foofy")$debug_string(), 'Literal(Utf8("foofy"), None)' ) expect_identical( - as_sedonadb_expr(1L)$debug_string(), + as_sedonadb_literal(1L)$debug_string(), 'Literal(Int32(1), None)' ) expect_identical( - as_sedonadb_expr(1.0)$debug_string(), + as_sedonadb_literal(1.0)$debug_string(), 'Literal(Float64(1), None)' ) expect_identical( - as_sedonadb_expr(as.raw(c(1:3)))$debug_string(), + as_sedonadb_literal(as.raw(c(1:3)))$debug_string(), 'Literal(Binary("1,2,3"), None)' ) }) test_that("non-scalars can't be automatically converted to literals", { expect_error( - as_sedonadb_expr(1:5)$debug_string(), + as_sedonadb_literal(1:5)$debug_string(), "Can't convert non-scalar to sedonadb_expr" ) }) test_that("expressions can be printed", { expect_snapshot( - print(as_sedonadb_expr("foofy")) + print(as_sedonadb_literal("foofy")) ) }) From 7783eb96d231db9cb75a86220bb4c887fe06e63b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 31 Dec 2025 23:41:24 -0600 Subject: [PATCH 12/26] a few more --- r/sedonadb/NAMESPACE | 8 +- r/sedonadb/R/000-wrappers.R | 33 ++- r/sedonadb/R/expression.R | 245 ++++++++---------- r/sedonadb/R/literal.R | 93 +++++++ r/sedonadb/man/as_sedonadb_literal.Rd | 16 +- r/sedonadb/man/sd_expr_column.Rd | 47 ++++ r/sedonadb/src/init.c | 29 ++- r/sedonadb/src/rust/api.h | 5 +- r/sedonadb/src/rust/src/expression.rs | 34 ++- .../tests/testthat/_snaps/expression.md | 20 +- r/sedonadb/tests/testthat/test-expression.R | 40 +-- r/sedonadb/tests/testthat/test-literal.R | 50 ++++ 12 files changed, 414 insertions(+), 206 deletions(-) create mode 100644 r/sedonadb/R/literal.R create mode 100644 r/sedonadb/man/sd_expr_column.Rd create mode 100644 r/sedonadb/tests/testthat/test-literal.R diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index 785ee58cb..72abd3d27 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -10,7 +10,7 @@ S3method(as_sedonadb_dataframe,nanoarrow_array) S3method(as_sedonadb_dataframe,nanoarrow_array_stream) S3method(as_sedonadb_dataframe,sedonadb_dataframe) S3method(as_sedonadb_dataframe,sf) -S3method(as_sedonadb_literal,SedonaDBExpr) +S3method(as_sedonadb_literal,"NULL") S3method(as_sedonadb_literal,character) S3method(as_sedonadb_literal,double) S3method(as_sedonadb_literal,integer) @@ -26,13 +26,19 @@ S3method(print,"sedonadb::SedonaDBExprFactory__bundle") S3method(print,"sedonadb::SedonaDBExpr__bundle") S3method(print,SedonaDBExpr) S3method(print,sedonadb_dataframe) +export(as_sd_expr) export(as_sedonadb_dataframe) export(as_sedonadb_literal) +export(is_sd_expr) export(sd_collect) export(sd_compute) export(sd_configure_proj) export(sd_count) export(sd_drop_view) +export(sd_expr_column) +export(sd_expr_factory) +export(sd_expr_literal) +export(sd_expr_scalar_function) export(sd_preview) export(sd_read_parquet) export(sd_register_udf) diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R index d026aa245..1e09c0f5c 100644 --- a/r/sedonadb/R/000-wrappers.R +++ b/r/sedonadb/R/000-wrappers.R @@ -268,16 +268,37 @@ class(`InternalDataFrame`) <- c("sedonadb::InternalDataFrame__bundle", "savvy_se ### wrapper functions for SedonaDBExpr +`SedonaDBExpr_alias` <- function(self) { + function(`name`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_alias__impl, `self`, `name`)) + } +} + +`SedonaDBExpr_cast` <- function(self) { + function(`schema_xptr`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_cast__impl, `self`, `schema_xptr`)) + } +} + `SedonaDBExpr_debug_string` <- function(self) { function() { .Call(savvy_SedonaDBExpr_debug_string__impl, `self`) } } +`SedonaDBExpr_display` <- function(self) { + function() { + .Call(savvy_SedonaDBExpr_display__impl, `self`) + } +} + `.savvy_wrap_SedonaDBExpr` <- function(ptr) { e <- new.env(parent = emptyenv()) e$.ptr <- ptr + e$`alias` <- `SedonaDBExpr_alias`(ptr) + e$`cast` <- `SedonaDBExpr_cast`(ptr) e$`debug_string` <- `SedonaDBExpr_debug_string`(ptr) + e$`display` <- `SedonaDBExpr_display`(ptr) class(e) <- c("sedonadb::SedonaDBExpr", "SedonaDBExpr", "savvy_sedonadb__sealed") e @@ -300,6 +321,12 @@ class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__se ### wrapper functions for SedonaDBExprFactory +`SedonaDBExprFactory_column` <- function(self) { + function(`name`, `qualifier` = NULL) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_column__impl, `self`, `name`, `qualifier`)) + } +} + `SedonaDBExprFactory_scalar_function` <- function(self) { function(`name`, `args`) { .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_scalar_function__impl, `self`, `name`, `args`)) @@ -309,6 +336,7 @@ class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__se `.savvy_wrap_SedonaDBExprFactory` <- function(ptr) { e <- new.env(parent = emptyenv()) e$.ptr <- ptr + e$`column` <- `SedonaDBExprFactory_column`(ptr) e$`scalar_function` <- `SedonaDBExprFactory_scalar_function`(ptr) class(e) <- c("sedonadb::SedonaDBExprFactory", "SedonaDBExprFactory", "savvy_sedonadb__sealed") @@ -321,10 +349,6 @@ class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__se ### associated functions for SedonaDBExprFactory -`SedonaDBExprFactory`$`column` <- function(`name`, `qualifier` = NULL) { - .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_column__impl, `name`, `qualifier`)) -} - `SedonaDBExprFactory`$`literal` <- function(`array_xptr`, `schema_xptr`) { .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_literal__impl, `array_xptr`, `schema_xptr`)) } @@ -341,3 +365,4 @@ class(`SedonaDBExprFactory`) <- c("sedonadb::SedonaDBExprFactory__bundle", "savv `print.sedonadb::SedonaDBExprFactory__bundle` <- function(x, ...) { cat('sedonadb::SedonaDBExprFactory\n') } + diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index d3707e5a0..8c32a3863 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -1,129 +1,74 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' Create a SedonaDB Literal Expression + +#' Create SedonaDB logical expressions #' -#' @param x An object to convert to a SedonaDB literal -#' @param ... Passed to/from methods -#' @param type An optional data type to request for the output -#' @param factory An expression factory object that should be passed to any -#' other calls to `as_sedonadb_literal()`. +#' @param column_name A column name +#' @param qualifier An optional qualifier (e.g., table reference) that may be +#' used to disambiguate a specific reference +#' @param ... Used to accommodate future expansion of the API. Currently all +#' dots must be empty. +#' @param factory A [sd_expr_factory()]. This factory wraps a SedonaDB context +#' and is used to resolve scalar functions and/or retrieve options. #' #' @returns An object of class SedonaDBExpr #' @export -as_sedonadb_literal <- function(x, ..., type = NULL, factory = NULL) { - UseMethod("as_sedonadb_literal") -} - -#' @export -as_sedonadb_literal.SedonaDBExpr <- function(x, ..., type = NULL) { - handle_type_request(x, type) -} - -#' @export -as_sedonadb_literal.character <- function(x, ..., type = NULL, factory = NULL) { - as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +#' +#' @examples +#' sd_expr_column("foofy") +#' sd_expr_literal(1L) +#' sd_expr_scalar_function("abs", list(1L)) +#' +sd_expr_column <- function(column_name, qualifier = NULL, ..., factory = sd_expr_factory()) { + rlang::check_dots_empty() + factory$column(column_name, qualifier) } +#' @rdname sd_expr_column #' @export -as_sedonadb_literal.integer <- function(x, ..., type = NULL) { - as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +sd_expr_literal <- function(x, ..., type = NULL, factory = sd_expr_factory()) { + rlang::check_dots_empty() + as_sedonadb_literal(x, type = type, factory = factory) } +#' @rdname sd_expr_column #' @export -as_sedonadb_literal.double <- function(x, ..., type = NULL) { - as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +sd_expr_scalar_function <- function(function_name, args, ..., factory = sd_expr_factory()) { + rlang::check_dots_empty() + args_as_expr <- lapply(args, as_sd_expr, factory = factory) + # Not sure why we need this exactly (something about savvy) + args_as_expr_ptr <- lapply(args_as_expr, "[[", ".ptr") + factory$scalar_function(function_name, args_as_expr_ptr) } +#' @rdname sd_expr_column #' @export -as_sedonadb_literal.raw <- function(x, ..., type = NULL) { - as_sedonadb_literal_from_nanoarrow(list(x), ..., type = type) -} - -as_sedonadb_literal_from_nanoarrow <- function(x, ..., type = NULL) { - if (length(x) != 1 || is.object(x)) { - stop("Can't convert non-scalar to sedonadb_expr") +as_sd_expr <- function(x, ..., factory = sd_expr_factory()) { + if (inherits(x, "SedonaDBExpr")) { + x + } else { + sd_expr_literal(x, factory = factory) } - - array <- nanoarrow::as_nanoarrow_array(x) - as_sedonadb_literal(array, type = type) } +#' @rdname sd_expr_column #' @export -as_sedonadb_literal.nanoarrow_array <- function(x, ..., type = NULL) { - schema <- nanoarrow::infer_nanoarrow_schema(x) - - array_export <- nanoarrow::nanoarrow_allocate_array() - nanoarrow::nanoarrow_pointer_export(x, array_export) - - expr <- SedonaDBExprFactory$literal(array_export, schema) - handle_type_request(expr, type) +is_sd_expr <- function(x) { + inherits(x, "SedonaDBExpr") } -handle_type_request <- function(x, type) { - if (!is.null(type)) { - x$cast(nanoarrow::as_nanoarrow_schema(x)) - } else { - x - } +#' @rdname sd_expr_column +#' @export +sd_expr_factory <- function() { + SedonaDBExprFactory$new(ctx()) } #' @export print.SedonaDBExpr <- function(x, ...) { cat("\n") - cat(x$debug_string()) + cat(x$display()) cat("\n") invisible(x) } -#' Expression evaluation context -#' -#' A context to use for evaluating a set of related R expressions into -#' SedonaDB expressions. -#' -#' @param schema A schema-like object coerced using -#' [nanoarrow::as_nanoarrow_schema()]. -#' -#' @return An object of class sedonadb_expr_ctx -#' @noRd -sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { - if (is.null(schema)) { - schema <- nanoarrow::na_struct() - } - - schema <- nanoarrow::as_nanoarrow_schema(schema) - data_names <- as.character(names(schema$children)) - data <- lapply(data_names, SedonaDBExprFactory$column) - names(data) <- data_names - - structure( - list( - factory = SedonaDBExprFactory$new(ctx()), - schema = schema, - data = rlang::as_data_mask(data), - data_names = data_names, - env = env, - fns = default_fns - ), - class = "sedonadb_expr_ctx" - ) -} - - #' Evaluate an R expression into a SedonaDB expression #' #' @param expr An R expression (e.g., the result of `quote()`). @@ -131,25 +76,36 @@ sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { #' #' @returns A `SedonaDBExpr` #' @noRd -sd_eval_expr <- function(expr, expr_ctx = sd_expr_ctx()) { - if (rlang::is_call(expr)) { - # If there is no expression anywhere in this call, just evaluate it in R - # and move on. - if (!r_expr_contains_sedonadb_expr(expr, expr_ctx)) { - return(sd_eval_default(expr, expr_ctx)) - } +sd_eval_expr <- function(expr, expr_ctx = sd_expr_ctx(env = env), env = parent.frame()) { + rlang::try_fetch({ + result <- sd_eval_expr_inner(expr, expr_ctx) + as_sd_expr(result, factory = factory) + }, error = function(e) { + rlang::abort( + sprintf("Error evaluating translated expression %s", rlang::expr_label(expr)), + parent = e + ) + }) +} - # Handle `pkg::fun` or `fun` +sd_eval_expr_inner <- function(expr, expr_ctx) { + if (rlang::is_call(expr)) { + # Extract `pkg::fun` or `fun` if this is a usual call (e.g., not + # something fancy like `fun()()`) call_name <- rlang::call_name(expr) + + # If this is not a fancy function call and we have a translation, call it. + # Individual translations can choose to defer to the R function if all the + # arguments are R objects and not SedonaDB expressions (or the user can + # use !! to force R evaluation). if (!is.null(call_name) && !is.null(expr_ctx$fns[[call_name]])) { - return(sd_eval_translation(call_name, expr, expr_ctx)) + sd_eval_translation(call_name, expr, expr_ctx) } else { - # Otherwise we have an inlined function and we just have to evaluate - return(sd_eval_default(expr, expr_ctx)) + sd_eval_default(expr, expr_ctx) } + } else { + sd_eval_default(expr, expr_ctx) } - - sd_eval_default(expr, expr_ctx) } sd_eval_translation <- function(fn_key, expr, expr_ctx) { @@ -170,37 +126,43 @@ sd_eval_translation <- function(fn_key, expr, expr_ctx) { } sd_eval_default <- function(expr, expr_ctx) { - rlang::try_fetch({ - r_result <- rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env) - as_sedonadb_literal(r_result) - }, error = function(e) { - rlang::abort( - "SedonaDB R evaluation error", - parent = e - ) - }) + rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env) } -r_expr_contains_sedonadb_expr <- function(expr, expr_ctx) { - if (rlang::is_call(expr, c("$", "[[")) && rlang::is_symbol(expr[[1]], ".data")) { - # An attempt to access the .data pronoun will either error or return an - # SedonaDB expression - TRUE - } else if (rlang::is_symbol(expr, expr_ctx$data_names)) { - TRUE - } else if (rlang::is_call(expr)) { - for (i in seq_along(expr)) { - if (r_expr_contains_sedonadb_expr(expr[[i]], expr_ctx)) { - return(TRUE) - } - } - - FALSE - } else if(rlang::is_atomic(expr)) { - inherits(expr, "sedonadb_expr") - } else { - FALSE +#' Expression evaluation context +#' +#' A context to use for evaluating a set of related R expressions into +#' SedonaDB expressions. One expression context may be used to translate +#' multiple expressions (e.g., all arguments to `mutate()`). +#' +#' @param schema A schema-like object coerced using +#' [nanoarrow::as_nanoarrow_schema()]. This is used to create the data mask +#' for expressions. +#' @param env The expression environment. This is needed to evaluate expressions. +#' +#' @return An object of class sedonadb_expr_ctx +#' @noRd +sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { + if (is.null(schema)) { + schema <- nanoarrow::na_struct() } + + schema <- nanoarrow::as_nanoarrow_schema(schema) + data_names <- as.character(names(schema$children)) + data <- lapply(data_names, sd_expr_column) + names(data) <- data_names + + structure( + list( + factory = sd_expr_factory(), + schema = schema, + data = rlang::as_data_mask(data), + data_names = data_names, + env = env, + fns = default_fns + ), + class = "sedonadb_expr_ctx" + ) } #' Register an R function translation into a SedonaDB expression @@ -208,7 +170,7 @@ r_expr_contains_sedonadb_expr <- function(expr, expr_ctx) { #' @param qualified_name The name of the function in the form `pkg::fun` or #' `fun` if the package name is not relevant. This allows translations to #' support calls to `fun()` or `pkg::fun()` that appear in an R expression. -#' @param fn A function. The first argument should always be `.factory`, which +#' @param fn A function. The first argument must always be `.factory`, which #' is the instance of `SedonaDBExprFactory` that may be used to construct #' the required expressions. #' @@ -227,6 +189,5 @@ sd_register_translation <- function(qualified_name, fn) { default_fns <- new.env(parent = emptyenv()) sd_register_translation("base::abs", function(.factory, x) { - # Not sure why I need $.ptr here - .factory$scalar_function("abs", list(x$.ptr)) + sd_expr_scalar_function("abs", list(x), factory = .factory) }) diff --git a/r/sedonadb/R/literal.R b/r/sedonadb/R/literal.R new file mode 100644 index 000000000..bf97be0c6 --- /dev/null +++ b/r/sedonadb/R/literal.R @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' S3 Generic to create a SedonaDB literal expression +#' +#' This generic provides the opportunity for objects to register a mechanism +#' to be understood as literals in the context of a SedonaDB expression. +#' Users constructing expressions directly should use [sd_expr_literal()]. +#' +#' @param x An object to convert to a SedonaDB literal +#' @param ... Passed to/from methods +#' @param type An optional data type to request for the output +#' @param factory An `sd_expr_factory()` that should be passed to any +#' other calls to `as_sedonadb_literal()` if needed +#' +#' @returns An object of class SedonaDBExpr +#' @export +#' +#' @examples +#' as_sedonadb_literal("abcd") +#' +as_sedonadb_literal <- function(x, ..., type = NULL, factory = NULL) { + UseMethod("as_sedonadb_literal") +} + +#' @export +as_sedonadb_literal.NULL <- function(x, ..., type = NULL) { + na <- nanoarrow::nanoarrow_array_init(nanoarrow::na_na()) |> + nanoarrow::nanoarrow_array_modify(list(length = 1L, null_count = 1L)) + as_sedonadb_literal_from_nanoarrow(na, ..., type = type) +} + +#' @export +as_sedonadb_literal.character <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_literal.integer <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_literal.double <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_literal.raw <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(list(x), ..., type = type) +} + +as_sedonadb_literal_from_nanoarrow <- function(x, ..., type = NULL) { + array <- nanoarrow::as_nanoarrow_array(x) + if (array$length != 1L) { + stop("Can't convert non-scalar to sedonadb_expr") + } + + as_sedonadb_literal(array, type = type) +} + +#' @export +as_sedonadb_literal.nanoarrow_array <- function(x, ..., type = NULL) { + schema <- nanoarrow::infer_nanoarrow_schema(x) + + array_export <- nanoarrow::nanoarrow_allocate_array() + nanoarrow::nanoarrow_pointer_export(x, array_export) + + expr <- SedonaDBExprFactory$literal(array_export, schema) + handle_type_request(expr, type) +} + +handle_type_request <- function(x, type) { + if (!is.null(type)) { + x$cast(nanoarrow::as_nanoarrow_schema(x)) + } else { + x + } +} diff --git a/r/sedonadb/man/as_sedonadb_literal.Rd b/r/sedonadb/man/as_sedonadb_literal.Rd index be32362b6..448b0222f 100644 --- a/r/sedonadb/man/as_sedonadb_literal.Rd +++ b/r/sedonadb/man/as_sedonadb_literal.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expression.R +% Please edit documentation in R/literal.R \name{as_sedonadb_literal} \alias{as_sedonadb_literal} -\title{Create a SedonaDB Literal Expression} +\title{S3 Generic to create a SedonaDB literal expression} \usage{ as_sedonadb_literal(x, ..., type = NULL, factory = NULL) } @@ -13,12 +13,18 @@ as_sedonadb_literal(x, ..., type = NULL, factory = NULL) \item{type}{An optional data type to request for the output} -\item{factory}{An expression factory object that should be passed to any -other calls to \code{as_sedonadb_literal()}.} +\item{factory}{An \code{sd_expr_factory()} that should be passed to any +other calls to \code{as_sedonadb_literal()} if needed} } \value{ An object of class SedonaDBExpr } \description{ -Create a SedonaDB Literal Expression +This generic provides the opportunity for objects to register a mechanism +to be understood as literals in the context of a SedonaDB expression. +Users constructing expressions directly should use \code{\link[=sd_expr_literal]{sd_expr_literal()}}. +} +\examples{ +as_sedonadb_literal("abcd") + } diff --git a/r/sedonadb/man/sd_expr_column.Rd b/r/sedonadb/man/sd_expr_column.Rd new file mode 100644 index 000000000..a963ef712 --- /dev/null +++ b/r/sedonadb/man/sd_expr_column.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expression.R +\name{sd_expr_column} +\alias{sd_expr_column} +\alias{sd_expr_literal} +\alias{sd_expr_scalar_function} +\alias{as_sd_expr} +\alias{is_sd_expr} +\alias{sd_expr_factory} +\title{Create SedonaDB logical expressions} +\usage{ +sd_expr_column(column_name, qualifier = NULL, ..., factory = sd_expr_factory()) + +sd_expr_literal(x, ..., type = NULL, factory = sd_expr_factory()) + +sd_expr_scalar_function(function_name, args, ..., factory = sd_expr_factory()) + +as_sd_expr(x, ..., factory = sd_expr_factory()) + +is_sd_expr(x) + +sd_expr_factory() +} +\arguments{ +\item{column_name}{A column name} + +\item{qualifier}{An optional qualifier (e.g., table reference) that may be +used to disambiguate a specific reference} + +\item{...}{Used to accommodate future expansion of the API. Currently all +dots must be empty.} + +\item{factory}{A \code{\link[=sd_expr_factory]{sd_expr_factory()}}. This factory wraps a SedonaDB context +and is used to resolve scalar functions and/or retrieve options.} +} +\value{ +An object of class SedonaDBExpr +} +\description{ +Create SedonaDB logical expressions +} +\examples{ +sd_expr_column("foofy") +sd_expr_literal(1L) +sd_expr_scalar_function("abs", list(1L)) + +} diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c index 44583102f..4a5bc4d68 100644 --- a/r/sedonadb/src/init.c +++ b/r/sedonadb/src/init.c @@ -209,15 +209,30 @@ SEXP savvy_InternalDataFrame_to_view__impl(SEXP self__, SEXP c_arg__ctx, return handle_result(res); } +SEXP savvy_SedonaDBExpr_alias__impl(SEXP self__, SEXP c_arg__name) { + SEXP res = savvy_SedonaDBExpr_alias__ffi(self__, c_arg__name); + return handle_result(res); +} + +SEXP savvy_SedonaDBExpr_cast__impl(SEXP self__, SEXP c_arg__schema_xptr) { + SEXP res = savvy_SedonaDBExpr_cast__ffi(self__, c_arg__schema_xptr); + return handle_result(res); +} + SEXP savvy_SedonaDBExpr_debug_string__impl(SEXP self__) { SEXP res = savvy_SedonaDBExpr_debug_string__ffi(self__); return handle_result(res); } -SEXP savvy_SedonaDBExprFactory_column__impl(SEXP c_arg__name, +SEXP savvy_SedonaDBExpr_display__impl(SEXP self__) { + SEXP res = savvy_SedonaDBExpr_display__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_column__impl(SEXP self__, SEXP c_arg__name, SEXP c_arg__qualifier) { - SEXP res = - savvy_SedonaDBExprFactory_column__ffi(c_arg__name, c_arg__qualifier); + SEXP res = savvy_SedonaDBExprFactory_column__ffi(self__, c_arg__name, + c_arg__qualifier); return handle_result(res); } @@ -290,10 +305,16 @@ static const R_CallMethodDef CallEntries[] = { (DL_FUNC)&savvy_InternalDataFrame_to_provider__impl, 1}, {"savvy_InternalDataFrame_to_view__impl", (DL_FUNC)&savvy_InternalDataFrame_to_view__impl, 4}, + {"savvy_SedonaDBExpr_alias__impl", (DL_FUNC)&savvy_SedonaDBExpr_alias__impl, + 2}, + {"savvy_SedonaDBExpr_cast__impl", (DL_FUNC)&savvy_SedonaDBExpr_cast__impl, + 2}, {"savvy_SedonaDBExpr_debug_string__impl", (DL_FUNC)&savvy_SedonaDBExpr_debug_string__impl, 1}, + {"savvy_SedonaDBExpr_display__impl", + (DL_FUNC)&savvy_SedonaDBExpr_display__impl, 1}, {"savvy_SedonaDBExprFactory_column__impl", - (DL_FUNC)&savvy_SedonaDBExprFactory_column__impl, 2}, + (DL_FUNC)&savvy_SedonaDBExprFactory_column__impl, 3}, {"savvy_SedonaDBExprFactory_literal__impl", (DL_FUNC)&savvy_SedonaDBExprFactory_literal__impl, 2}, {"savvy_SedonaDBExprFactory_new__impl", diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h index ef302acba..f12959c83 100644 --- a/r/sedonadb/src/rust/api.h +++ b/r/sedonadb/src/rust/api.h @@ -62,10 +62,13 @@ SEXP savvy_InternalDataFrame_to_view__ffi(SEXP self__, SEXP c_arg__ctx, SEXP c_arg__overwrite); // methods and associated functions for SedonaDBExpr +SEXP savvy_SedonaDBExpr_alias__ffi(SEXP self__, SEXP c_arg__name); +SEXP savvy_SedonaDBExpr_cast__ffi(SEXP self__, SEXP c_arg__schema_xptr); SEXP savvy_SedonaDBExpr_debug_string__ffi(SEXP self__); +SEXP savvy_SedonaDBExpr_display__ffi(SEXP self__); // methods and associated functions for SedonaDBExprFactory -SEXP savvy_SedonaDBExprFactory_column__ffi(SEXP c_arg__name, +SEXP savvy_SedonaDBExprFactory_column__ffi(SEXP self__, SEXP c_arg__name, SEXP c_arg__qualifier); SEXP savvy_SedonaDBExprFactory_literal__ffi(SEXP c_arg__array_xptr, SEXP c_arg__schema_xptr); diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs index 777c344ca..9f7f10b0e 100644 --- a/r/sedonadb/src/rust/src/expression.rs +++ b/r/sedonadb/src/rust/src/expression.rs @@ -20,12 +20,15 @@ use std::sync::Arc; use datafusion_common::{Column, ScalarValue}; use datafusion_expr::{ expr::{FieldMetadata, ScalarFunction}, - Expr, + Cast, Expr, }; use savvy::{savvy, savvy_err}; use sedona::context::SedonaContext; -use crate::{context::InternalContext, ffi::import_array}; +use crate::{ + context::InternalContext, + ffi::{import_array, import_field}, +}; #[savvy] pub struct SedonaDBExpr { @@ -34,9 +37,34 @@ pub struct SedonaDBExpr { #[savvy] impl SedonaDBExpr { + fn display(&self) -> savvy::Result { + format!("{}", self.inner).try_into() + } + fn debug_string(&self) -> savvy::Result { format!("{:?}", self.inner).try_into() } + + fn alias(&self, name: &str) -> savvy::Result { + let inner = self.inner.clone().alias_if_changed(name.to_string())?; + Ok(Self { inner }) + } + + fn cast(&self, schema_xptr: savvy::Sexp) -> savvy::Result { + let field = import_field(schema_xptr)?; + if let Some(type_name) = field.extension_type_name() { + return Err(savvy_err!( + "Can't cast to Arrow extension type '{type_name}'" + )); + } + + let inner = Expr::Cast(Cast::new( + self.inner.clone().into(), + field.data_type().clone(), + )); + + Ok(Self { inner }) + } } #[savvy] @@ -65,7 +93,7 @@ impl SedonaDBExprFactory { Ok(SedonaDBExpr { inner }) } - fn column(name: &str, qualifier: Option<&str>) -> savvy::Result { + fn column(&self, name: &str, qualifier: Option<&str>) -> savvy::Result { let inner = Expr::Column(Column::new(qualifier, name)); Ok(SedonaDBExpr { inner }) } diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index d7fd4ef35..762473197 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -4,7 +4,7 @@ print(as_sedonadb_literal("foofy")) Output - Literal(Utf8("foofy"), None) + Utf8("foofy") # literal expressions can be translated @@ -12,7 +12,7 @@ sd_eval_expr(quote(1L)) Output - Literal(Int32(1), None) + Int32(1) # column expressions can be translated @@ -20,7 +20,7 @@ sd_eval_expr(quote(col0), expr_ctx) Output - Column(Column { relation: None, name: "col0" }) + col0 --- @@ -28,7 +28,7 @@ sd_eval_expr(quote(.data$col0), expr_ctx) Output - Column(Column { relation: None, name: "col0" }) + col0 --- @@ -36,21 +36,21 @@ sd_eval_expr(quote(.data[[col_zero]]), expr_ctx) Output - Column(Column { relation: None, name: "col0" }) + col0 -# function calls containing no SedonaDB expressions can be translated +# function calls with a translation become function calls Code sd_eval_expr(quote(abs(-1L))) Output - Literal(Int32(1), None) + abs(Int32(-1)) -# function calls containing SedonaDB expressions can be translated +# function calls without a translation are evaluated in R Code - sd_eval_expr(quote(abs(col0)), expr_ctx) + sd_eval_expr(quote(function_without_a_translation(1L))) Output - ScalarFunction(ScalarFunction { func: ScalarUDF { inner: AbsFunc { signature: Signature { type_signature: Numeric(1), volatility: Immutable } } }, args: [Column(Column { relation: None, name: "col0" })] }) + Int32(2) diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index 91fac00bf..e0af41dfe 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -15,35 +15,6 @@ # specific language governing permissions and limitations # under the License. -test_that("basic literals can be converted to expressions", { - expect_identical( - as_sedonadb_literal("foofy")$debug_string(), - 'Literal(Utf8("foofy"), None)' - ) - - expect_identical( - as_sedonadb_literal(1L)$debug_string(), - 'Literal(Int32(1), None)' - ) - - expect_identical( - as_sedonadb_literal(1.0)$debug_string(), - 'Literal(Float64(1), None)' - ) - - expect_identical( - as_sedonadb_literal(as.raw(c(1:3)))$debug_string(), - 'Literal(Binary("1,2,3"), None)' - ) -}) - -test_that("non-scalars can't be automatically converted to literals", { - expect_error( - as_sedonadb_literal(1:5)$debug_string(), - "Can't convert non-scalar to sedonadb_expr" - ) -}) - test_that("expressions can be printed", { expect_snapshot( print(as_sedonadb_literal("foofy")) @@ -69,14 +40,11 @@ test_that("column expressions can be translated", { ) }) -test_that("function calls containing no SedonaDB expressions can be translated", { - # Ensure these are evaluated in R (i.e., the resulting expression is a literal) +test_that("function calls with a translation become function calls", { expect_snapshot(sd_eval_expr(quote(abs(-1L)))) }) -test_that("function calls containing SedonaDB expressions can be translated", { - # Ensure these are translated as a function call - schema <- nanoarrow::na_struct(list(col0 = nanoarrow::na_int32())) - expr_ctx <- sd_expr_ctx(schema) - expect_snapshot(sd_eval_expr(quote(abs(col0)), expr_ctx)) +test_that("function calls without a translation are evaluated in R", { + function_without_a_translation <- function(x) x + 1L + expect_snapshot(sd_eval_expr(quote(function_without_a_translation(1L)))) }) diff --git a/r/sedonadb/tests/testthat/test-literal.R b/r/sedonadb/tests/testthat/test-literal.R new file mode 100644 index 000000000..6096ef151 --- /dev/null +++ b/r/sedonadb/tests/testthat/test-literal.R @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_that("basic literals can be converted to expressions", { + expect_identical( + as_sedonadb_literal(NULL)$debug_string(), + 'Literal(NULL, None)' + ) + + expect_identical( + as_sedonadb_literal("foofy")$debug_string(), + 'Literal(Utf8("foofy"), None)' + ) + + expect_identical( + as_sedonadb_literal(1L)$debug_string(), + 'Literal(Int32(1), None)' + ) + + expect_identical( + as_sedonadb_literal(1.0)$debug_string(), + 'Literal(Float64(1), None)' + ) + + expect_identical( + as_sedonadb_literal(as.raw(c(1:3)))$debug_string(), + 'Literal(Binary("1,2,3"), None)' + ) +}) + +test_that("non-scalars can't be automatically converted to literals", { + expect_error( + as_sedonadb_literal(1:5)$debug_string(), + "Can't convert non-scalar to sedonadb_expr" + ) +}) From ab1790c688aa5156d77c456b7d66443b0eb10ff8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 31 Dec 2025 23:57:42 -0600 Subject: [PATCH 13/26] more tests --- r/sedonadb/NAMESPACE | 3 ++ r/sedonadb/R/expression.R | 26 ++++++++++++- r/sedonadb/R/literal.R | 5 +++ r/sedonadb/man/sd_expr_column.Rd | 17 +++++++++ .../tests/testthat/_snaps/expression.md | 38 +++++++++++++++++-- r/sedonadb/tests/testthat/_snaps/literal.md | 8 ++++ r/sedonadb/tests/testthat/test-expression.R | 15 ++++++-- r/sedonadb/tests/testthat/test-literal.R | 4 ++ 8 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 r/sedonadb/tests/testthat/_snaps/literal.md diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index 72abd3d27..abca6a32d 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -16,6 +16,7 @@ S3method(as_sedonadb_literal,double) S3method(as_sedonadb_literal,integer) S3method(as_sedonadb_literal,nanoarrow_array) S3method(as_sedonadb_literal,raw) +S3method(as_sedonadb_literal,wk_wkb) S3method(dim,sedonadb_dataframe) S3method(dimnames,sedonadb_dataframe) S3method(head,sedonadb_dataframe) @@ -35,6 +36,8 @@ export(sd_compute) export(sd_configure_proj) export(sd_count) export(sd_drop_view) +export(sd_expr_alias) +export(sd_expr_cast) export(sd_expr_column) export(sd_expr_factory) export(sd_expr_literal) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 8c32a3863..f0fd8401a 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -4,10 +4,15 @@ #' @param column_name A column name #' @param qualifier An optional qualifier (e.g., table reference) that may be #' used to disambiguate a specific reference -#' @param ... Used to accommodate future expansion of the API. Currently all -#' dots must be empty. +#' @param function_name The name of the function to call. This name is resolved +#' from the context associated with `factory`. +#' @param type A destination type into which `expr` should be cast. +#' @param expr A SedonaDBExpr or object coercible to one with [as_sd_expr()]. +#' @param alias An alias to apply to `expr`. #' @param factory A [sd_expr_factory()]. This factory wraps a SedonaDB context #' and is used to resolve scalar functions and/or retrieve options. +#' @param ... Used to accommodate future expansion of the API. Currently all +#' dots must be empty. #' #' @returns An object of class SedonaDBExpr #' @export @@ -16,6 +21,8 @@ #' sd_expr_column("foofy") #' sd_expr_literal(1L) #' sd_expr_scalar_function("abs", list(1L)) +#' sd_expr_cast(1L, nanoarrow::na_int64()) +#' sd_expr_alias(1L, "foofy") #' sd_expr_column <- function(column_name, qualifier = NULL, ..., factory = sd_expr_factory()) { rlang::check_dots_empty() @@ -39,6 +46,21 @@ sd_expr_scalar_function <- function(function_name, args, ..., factory = sd_expr_ factory$scalar_function(function_name, args_as_expr_ptr) } +#' @rdname sd_expr_column +#' @export +sd_expr_cast <- function(expr, type, ..., factory = sd_expr_factory()) { + expr <- as_sd_expr(expr, factory = factory) + type <- nanoarrow::as_nanoarrow_schema(type) + expr$cast(type) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_alias <- function(expr, alias, ..., factory = sd_expr_factory()) { + expr <- as_sd_expr(expr, factory = factory) + expr$alias(alias) +} + #' @rdname sd_expr_column #' @export as_sd_expr <- function(x, ..., factory = sd_expr_factory()) { diff --git a/r/sedonadb/R/literal.R b/r/sedonadb/R/literal.R index bf97be0c6..8c184d62c 100644 --- a/r/sedonadb/R/literal.R +++ b/r/sedonadb/R/literal.R @@ -64,6 +64,11 @@ as_sedonadb_literal.raw <- function(x, ..., type = NULL) { as_sedonadb_literal_from_nanoarrow(list(x), ..., type = type) } +#' @export +as_sedonadb_literal.wk_wkb <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + as_sedonadb_literal_from_nanoarrow <- function(x, ..., type = NULL) { array <- nanoarrow::as_nanoarrow_array(x) if (array$length != 1L) { diff --git a/r/sedonadb/man/sd_expr_column.Rd b/r/sedonadb/man/sd_expr_column.Rd index a963ef712..22b229ac6 100644 --- a/r/sedonadb/man/sd_expr_column.Rd +++ b/r/sedonadb/man/sd_expr_column.Rd @@ -4,6 +4,8 @@ \alias{sd_expr_column} \alias{sd_expr_literal} \alias{sd_expr_scalar_function} +\alias{sd_expr_cast} +\alias{sd_expr_alias} \alias{as_sd_expr} \alias{is_sd_expr} \alias{sd_expr_factory} @@ -15,6 +17,10 @@ sd_expr_literal(x, ..., type = NULL, factory = sd_expr_factory()) sd_expr_scalar_function(function_name, args, ..., factory = sd_expr_factory()) +sd_expr_cast(expr, type, ..., factory = sd_expr_factory()) + +sd_expr_alias(expr, alias, ..., factory = sd_expr_factory()) + as_sd_expr(x, ..., factory = sd_expr_factory()) is_sd_expr(x) @@ -32,6 +38,15 @@ dots must be empty.} \item{factory}{A \code{\link[=sd_expr_factory]{sd_expr_factory()}}. This factory wraps a SedonaDB context and is used to resolve scalar functions and/or retrieve options.} + +\item{type}{A destination type into which \code{expr} should be cast.} + +\item{function_name}{The name of the function to call. This name is resolved +from the context associated with \code{factory}.} + +\item{expr}{A SedonaDBExpr or object coercible to one with \code{\link[=as_sd_expr]{as_sd_expr()}}.} + +\item{alias}{An alias to apply to \code{expr}.} } \value{ An object of class SedonaDBExpr @@ -43,5 +58,7 @@ Create SedonaDB logical expressions sd_expr_column("foofy") sd_expr_literal(1L) sd_expr_scalar_function("abs", list(1L)) +sd_expr_cast(1L, nanoarrow::na_int64()) +sd_expr_alias(1L, "foofy") } diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index 762473197..a7c16761f 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -1,10 +1,42 @@ -# expressions can be printed +# basic expression types can be constructed Code - print(as_sedonadb_literal("foofy")) + sd_expr_column("foofy") Output - Utf8("foofy") + foofy + +--- + + Code + sd_expr_literal(1L) + Output + + Int32(1) + +--- + + Code + sd_expr_scalar_function("abs", list(1L)) + Output + + abs(Int32(1)) + +--- + + Code + sd_expr_cast(1L, nanoarrow::na_int64()) + Output + + CAST(Int32(1) AS Int64) + +--- + + Code + sd_expr_alias(1L, "foofy") + Output + + Int32(1) AS foofy # literal expressions can be translated diff --git a/r/sedonadb/tests/testthat/_snaps/literal.md b/r/sedonadb/tests/testthat/_snaps/literal.md new file mode 100644 index 000000000..c115e766f --- /dev/null +++ b/r/sedonadb/tests/testthat/_snaps/literal.md @@ -0,0 +1,8 @@ +# literals with Arrow extension metadata can be converted to literals + + Code + as_sedonadb_literal(wk::as_wkb("POINT (0 1)")) + Output + + Binary("1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,63") FieldMetadata { inner: {"ARROW:extension:metadata": "{}", "ARROW:extension:name": "geoarrow.wkb"} } + diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index e0af41dfe..d4079bad4 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -15,9 +15,18 @@ # specific language governing permissions and limitations # under the License. -test_that("expressions can be printed", { - expect_snapshot( - print(as_sedonadb_literal("foofy")) +test_that("basic expression types can be constructed", { + expect_snapshot(sd_expr_column("foofy")) + expect_snapshot(sd_expr_literal(1L)) + expect_snapshot(sd_expr_scalar_function("abs", list(1L))) + expect_snapshot(sd_expr_cast(1L, nanoarrow::na_int64())) + expect_snapshot(sd_expr_alias(1L, "foofy")) +}) + +test_that("casts to a type with extension metadata can't be constructed", { + expect_error( + sd_expr_cast(1L, geoarrow::geoarrow_wkb()), + "Can't cast to Arrow extension type 'geoarrow.wkb'" ) }) diff --git a/r/sedonadb/tests/testthat/test-literal.R b/r/sedonadb/tests/testthat/test-literal.R index 6096ef151..3d41b93f1 100644 --- a/r/sedonadb/tests/testthat/test-literal.R +++ b/r/sedonadb/tests/testthat/test-literal.R @@ -42,6 +42,10 @@ test_that("basic literals can be converted to expressions", { ) }) +test_that("literals with Arrow extension metadata can be converted to literals", { + expect_snapshot(as_sedonadb_literal(wk::as_wkb("POINT (0 1)"))) +}) + test_that("non-scalars can't be automatically converted to literals", { expect_error( as_sedonadb_literal(1:5)$debug_string(), From 36f0f908964aaf1e110faac52ff235456c1fa702 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 1 Jan 2026 22:17:10 -0600 Subject: [PATCH 14/26] undo the configure hack --- r/sedonadb/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/sedonadb/configure b/r/sedonadb/configure index 4bd8fde96..a4f74a619 100755 --- a/r/sedonadb/configure +++ b/r/sedonadb/configure @@ -22,7 +22,7 @@ pkg-config geos 2>/dev/null if [ $? -eq 0 ]; then - PKGCONFIG_LIBS=`pkg-config --libs geos --static` + PKGCONFIG_LIBS=`pkg-config --libs geos` fi if [ "$LIB_DIR" ]; then From fde141704531790428525571be85e5d0ee5aa765 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 1 Jan 2026 22:17:39 -0600 Subject: [PATCH 15/26] add license --- r/sedonadb/R/expression.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index f0fd8401a..174ef76a9 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. #' Create SedonaDB logical expressions #' From 50a3cec28d93723fea6c075cc1ad2b0d0a1cee65 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 1 Jan 2026 22:18:28 -0600 Subject: [PATCH 16/26] whitespace --- r/sedonadb/R/000-wrappers.R | 1 - 1 file changed, 1 deletion(-) diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R index 1e09c0f5c..a1391d058 100644 --- a/r/sedonadb/R/000-wrappers.R +++ b/r/sedonadb/R/000-wrappers.R @@ -365,4 +365,3 @@ class(`SedonaDBExprFactory`) <- c("sedonadb::SedonaDBExprFactory__bundle", "savv `print.sedonadb::SedonaDBExprFactory__bundle` <- function(x, ...) { cat('sedonadb::SedonaDBExprFactory\n') } - From e9f692784ceb278b4d3b95fa73b63255b9979bf7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 1 Jan 2026 23:56:47 -0600 Subject: [PATCH 17/26] add some more expression types --- r/sedonadb/NAMESPACE | 2 + r/sedonadb/R/000-wrappers.R | 23 ++++++ r/sedonadb/R/expression.R | 46 ++++++++--- r/sedonadb/R/literal.R | 2 +- r/sedonadb/man/sd_expr_column.Rd | 30 ++++--- r/sedonadb/src/init.c | 28 +++++++ r/sedonadb/src/rust/api.h | 8 ++ r/sedonadb/src/rust/src/expression.rs | 78 +++++++++++++++++-- .../tests/testthat/_snaps/expression.md | 42 ++++++++++ r/sedonadb/tests/testthat/test-expression.R | 10 +++ r/sedonadb/tests/testthat/test-literal.R | 7 ++ 11 files changed, 246 insertions(+), 30 deletions(-) diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index abca6a32d..97b8969ce 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -37,10 +37,12 @@ export(sd_configure_proj) export(sd_count) export(sd_drop_view) export(sd_expr_alias) +export(sd_expr_binary) export(sd_expr_cast) export(sd_expr_column) export(sd_expr_factory) export(sd_expr_literal) +export(sd_expr_negative) export(sd_expr_scalar_function) export(sd_preview) export(sd_read_parquet) diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R index a1391d058..40fd3ce4c 100644 --- a/r/sedonadb/R/000-wrappers.R +++ b/r/sedonadb/R/000-wrappers.R @@ -292,6 +292,12 @@ class(`InternalDataFrame`) <- c("sedonadb::InternalDataFrame__bundle", "savvy_se } } +`SedonaDBExpr_negate` <- function(self) { + function() { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_negate__impl, `self`)) + } +} + `.savvy_wrap_SedonaDBExpr` <- function(ptr) { e <- new.env(parent = emptyenv()) e$.ptr <- ptr @@ -299,6 +305,7 @@ class(`InternalDataFrame`) <- c("sedonadb::InternalDataFrame__bundle", "savvy_se e$`cast` <- `SedonaDBExpr_cast`(ptr) e$`debug_string` <- `SedonaDBExpr_debug_string`(ptr) e$`display` <- `SedonaDBExpr_display`(ptr) + e$`negate` <- `SedonaDBExpr_negate`(ptr) class(e) <- c("sedonadb::SedonaDBExpr", "SedonaDBExpr", "savvy_sedonadb__sealed") e @@ -321,6 +328,20 @@ class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__se ### wrapper functions for SedonaDBExprFactory +`SedonaDBExprFactory_aggregate_function` <- function(self) { + function(`name`, `args`, `na_rm` = NULL, `distinct` = NULL) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_aggregate_function__impl, `self`, `name`, `args`, `na_rm`, `distinct`)) + } +} + +`SedonaDBExprFactory_binary` <- function(self) { + function(`op`, `lhs`, `rhs`) { + `lhs` <- .savvy_extract_ptr(`lhs`, "sedonadb::SedonaDBExpr") + `rhs` <- .savvy_extract_ptr(`rhs`, "sedonadb::SedonaDBExpr") + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_binary__impl, `self`, `op`, `lhs`, `rhs`)) + } +} + `SedonaDBExprFactory_column` <- function(self) { function(`name`, `qualifier` = NULL) { .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_column__impl, `self`, `name`, `qualifier`)) @@ -336,6 +357,8 @@ class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__se `.savvy_wrap_SedonaDBExprFactory` <- function(ptr) { e <- new.env(parent = emptyenv()) e$.ptr <- ptr + e$`aggregate_function` <- `SedonaDBExprFactory_aggregate_function`(ptr) + e$`binary` <- `SedonaDBExprFactory_binary`(ptr) e$`column` <- `SedonaDBExprFactory_column`(ptr) e$`scalar_function` <- `SedonaDBExprFactory_scalar_function`(ptr) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 174ef76a9..5f119dff4 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -25,10 +25,11 @@ #' @param type A destination type into which `expr` should be cast. #' @param expr A SedonaDBExpr or object coercible to one with [as_sd_expr()]. #' @param alias An alias to apply to `expr`. +#' @param op Operator name for a binary expression. In general these follow +#' R function names (e.g., `>`, `<`, `+`, `-`). +#' @param lhs,rhs Arguments to a binary expression #' @param factory A [sd_expr_factory()]. This factory wraps a SedonaDB context #' and is used to resolve scalar functions and/or retrieve options. -#' @param ... Used to accommodate future expansion of the API. Currently all -#' dots must be empty. #' #' @returns An object of class SedonaDBExpr #' @export @@ -40,22 +41,31 @@ #' sd_expr_cast(1L, nanoarrow::na_int64()) #' sd_expr_alias(1L, "foofy") #' -sd_expr_column <- function(column_name, qualifier = NULL, ..., factory = sd_expr_factory()) { - rlang::check_dots_empty() +sd_expr_column <- function(column_name, qualifier = NULL, factory = sd_expr_factory()) { factory$column(column_name, qualifier) } #' @rdname sd_expr_column #' @export -sd_expr_literal <- function(x, ..., type = NULL, factory = sd_expr_factory()) { - rlang::check_dots_empty() +sd_expr_literal <- function(x, type = NULL, factory = sd_expr_factory()) { as_sedonadb_literal(x, type = type, factory = factory) } #' @rdname sd_expr_column #' @export -sd_expr_scalar_function <- function(function_name, args, ..., factory = sd_expr_factory()) { - rlang::check_dots_empty() +sd_expr_binary <- function(op, lhs, rhs, factory = sd_expr_factory()) { + factory$binary(op, as_sd_expr(lhs), as_sd_expr(rhs)) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_negative <- function(expr, factory = sd_expr_factory()) { + as_sd_expr(expr, factory = factory)$negate() +} + +#' @rdname sd_expr_column +#' @export +sd_expr_scalar_function <- function(function_name, args, factory = sd_expr_factory()) { args_as_expr <- lapply(args, as_sd_expr, factory = factory) # Not sure why we need this exactly (something about savvy) args_as_expr_ptr <- lapply(args_as_expr, "[[", ".ptr") @@ -64,7 +74,17 @@ sd_expr_scalar_function <- function(function_name, args, ..., factory = sd_expr_ #' @rdname sd_expr_column #' @export -sd_expr_cast <- function(expr, type, ..., factory = sd_expr_factory()) { +sd_expr_aggregate_function <- function(function_name, args, ..., + na.rm = FALSE, distinct = FALSE, factory = sd_expr_factory()) { + args_as_expr <- lapply(args, as_sd_expr, factory = factory) + # Not sure why we need this exactly (something about savvy) + args_as_expr_ptr <- lapply(args_as_expr, "[[", ".ptr") + factory$aggregate_function(function_name, args_as_expr_ptr, na_rm = na.rm, distinct = distinct) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_cast <- function(expr, type, factory = sd_expr_factory()) { expr <- as_sd_expr(expr, factory = factory) type <- nanoarrow::as_nanoarrow_schema(type) expr$cast(type) @@ -72,14 +92,14 @@ sd_expr_cast <- function(expr, type, ..., factory = sd_expr_factory()) { #' @rdname sd_expr_column #' @export -sd_expr_alias <- function(expr, alias, ..., factory = sd_expr_factory()) { +sd_expr_alias <- function(expr, alias, factory = sd_expr_factory()) { expr <- as_sd_expr(expr, factory = factory) expr$alias(alias) } #' @rdname sd_expr_column #' @export -as_sd_expr <- function(x, ..., factory = sd_expr_factory()) { +as_sd_expr <- function(x, factory = sd_expr_factory()) { if (inherits(x, "SedonaDBExpr")) { x } else { @@ -229,3 +249,7 @@ default_fns <- new.env(parent = emptyenv()) sd_register_translation("base::abs", function(.factory, x) { sd_expr_scalar_function("abs", list(x), factory = .factory) }) + +sd_register_translation("base::sum", function(.factory, x, ..., na.rm = FALSE) { + sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .factory) +}) diff --git a/r/sedonadb/R/literal.R b/r/sedonadb/R/literal.R index 8c184d62c..679239cab 100644 --- a/r/sedonadb/R/literal.R +++ b/r/sedonadb/R/literal.R @@ -91,7 +91,7 @@ as_sedonadb_literal.nanoarrow_array <- function(x, ..., type = NULL) { handle_type_request <- function(x, type) { if (!is.null(type)) { - x$cast(nanoarrow::as_nanoarrow_schema(x)) + x$cast(nanoarrow::as_nanoarrow_schema(type)) } else { x } diff --git a/r/sedonadb/man/sd_expr_column.Rd b/r/sedonadb/man/sd_expr_column.Rd index 22b229ac6..3068c0c90 100644 --- a/r/sedonadb/man/sd_expr_column.Rd +++ b/r/sedonadb/man/sd_expr_column.Rd @@ -3,6 +3,8 @@ \name{sd_expr_column} \alias{sd_expr_column} \alias{sd_expr_literal} +\alias{sd_expr_binary} +\alias{sd_expr_negative} \alias{sd_expr_scalar_function} \alias{sd_expr_cast} \alias{sd_expr_alias} @@ -11,17 +13,21 @@ \alias{sd_expr_factory} \title{Create SedonaDB logical expressions} \usage{ -sd_expr_column(column_name, qualifier = NULL, ..., factory = sd_expr_factory()) +sd_expr_column(column_name, qualifier = NULL, factory = sd_expr_factory()) -sd_expr_literal(x, ..., type = NULL, factory = sd_expr_factory()) +sd_expr_literal(x, type = NULL, factory = sd_expr_factory()) -sd_expr_scalar_function(function_name, args, ..., factory = sd_expr_factory()) +sd_expr_binary(op, lhs, rhs, factory = sd_expr_factory()) -sd_expr_cast(expr, type, ..., factory = sd_expr_factory()) +sd_expr_negative(expr, factory = sd_expr_factory()) -sd_expr_alias(expr, alias, ..., factory = sd_expr_factory()) +sd_expr_scalar_function(function_name, args, factory = sd_expr_factory()) -as_sd_expr(x, ..., factory = sd_expr_factory()) +sd_expr_cast(expr, type, factory = sd_expr_factory()) + +sd_expr_alias(expr, alias, factory = sd_expr_factory()) + +as_sd_expr(x, factory = sd_expr_factory()) is_sd_expr(x) @@ -33,19 +39,21 @@ sd_expr_factory() \item{qualifier}{An optional qualifier (e.g., table reference) that may be used to disambiguate a specific reference} -\item{...}{Used to accommodate future expansion of the API. Currently all -dots must be empty.} - \item{factory}{A \code{\link[=sd_expr_factory]{sd_expr_factory()}}. This factory wraps a SedonaDB context and is used to resolve scalar functions and/or retrieve options.} \item{type}{A destination type into which \code{expr} should be cast.} -\item{function_name}{The name of the function to call. This name is resolved -from the context associated with \code{factory}.} +\item{op}{Operator name for a binary expression. In general these follow +R function names (e.g., \code{>}, \code{<}, \code{+}, \code{-}).} + +\item{lhs, rhs}{Arguments to a binary expression} \item{expr}{A SedonaDBExpr or object coercible to one with \code{\link[=as_sd_expr]{as_sd_expr()}}.} +\item{function_name}{The name of the function to call. This name is resolved +from the context associated with \code{factory}.} + \item{alias}{An alias to apply to \code{expr}.} } \value{ diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c index 4a5bc4d68..6faa15217 100644 --- a/r/sedonadb/src/init.c +++ b/r/sedonadb/src/init.c @@ -229,6 +229,28 @@ SEXP savvy_SedonaDBExpr_display__impl(SEXP self__) { return handle_result(res); } +SEXP savvy_SedonaDBExpr_negate__impl(SEXP self__) { + SEXP res = savvy_SedonaDBExpr_negate__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_aggregate_function__impl(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args, + SEXP c_arg__na_rm, + SEXP c_arg__distinct) { + SEXP res = savvy_SedonaDBExprFactory_aggregate_function__ffi( + self__, c_arg__name, c_arg__args, c_arg__na_rm, c_arg__distinct); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_binary__impl(SEXP self__, SEXP c_arg__op, + SEXP c_arg__lhs, SEXP c_arg__rhs) { + SEXP res = savvy_SedonaDBExprFactory_binary__ffi(self__, c_arg__op, + c_arg__lhs, c_arg__rhs); + return handle_result(res); +} + SEXP savvy_SedonaDBExprFactory_column__impl(SEXP self__, SEXP c_arg__name, SEXP c_arg__qualifier) { SEXP res = savvy_SedonaDBExprFactory_column__ffi(self__, c_arg__name, @@ -313,6 +335,12 @@ static const R_CallMethodDef CallEntries[] = { (DL_FUNC)&savvy_SedonaDBExpr_debug_string__impl, 1}, {"savvy_SedonaDBExpr_display__impl", (DL_FUNC)&savvy_SedonaDBExpr_display__impl, 1}, + {"savvy_SedonaDBExpr_negate__impl", + (DL_FUNC)&savvy_SedonaDBExpr_negate__impl, 1}, + {"savvy_SedonaDBExprFactory_aggregate_function__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_aggregate_function__impl, 5}, + {"savvy_SedonaDBExprFactory_binary__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_binary__impl, 4}, {"savvy_SedonaDBExprFactory_column__impl", (DL_FUNC)&savvy_SedonaDBExprFactory_column__impl, 3}, {"savvy_SedonaDBExprFactory_literal__impl", diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h index f12959c83..fac6258bd 100644 --- a/r/sedonadb/src/rust/api.h +++ b/r/sedonadb/src/rust/api.h @@ -66,8 +66,16 @@ SEXP savvy_SedonaDBExpr_alias__ffi(SEXP self__, SEXP c_arg__name); SEXP savvy_SedonaDBExpr_cast__ffi(SEXP self__, SEXP c_arg__schema_xptr); SEXP savvy_SedonaDBExpr_debug_string__ffi(SEXP self__); SEXP savvy_SedonaDBExpr_display__ffi(SEXP self__); +SEXP savvy_SedonaDBExpr_negate__ffi(SEXP self__); // methods and associated functions for SedonaDBExprFactory +SEXP savvy_SedonaDBExprFactory_aggregate_function__ffi(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args, + SEXP c_arg__na_rm, + SEXP c_arg__distinct); +SEXP savvy_SedonaDBExprFactory_binary__ffi(SEXP self__, SEXP c_arg__op, + SEXP c_arg__lhs, SEXP c_arg__rhs); SEXP savvy_SedonaDBExprFactory_column__ffi(SEXP self__, SEXP c_arg__name, SEXP c_arg__qualifier); SEXP savvy_SedonaDBExprFactory_literal__ffi(SEXP c_arg__array_xptr, diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs index 9f7f10b0e..a52fb34e9 100644 --- a/r/sedonadb/src/rust/src/expression.rs +++ b/r/sedonadb/src/rust/src/expression.rs @@ -19,8 +19,9 @@ use std::sync::Arc; use datafusion_common::{Column, ScalarValue}; use datafusion_expr::{ - expr::{FieldMetadata, ScalarFunction}, - Cast, Expr, + expr::{AggregateFunction, FieldMetadata, ScalarFunction}, + sqlparser::ast::NullTreatment, + BinaryExpr, Cast, Expr, Operator, }; use savvy::{savvy, savvy_err}; use sedona::context::SedonaContext; @@ -65,6 +66,11 @@ impl SedonaDBExpr { Ok(Self { inner }) } + + fn negate(&self) -> savvy::Result { + let inner = Expr::Negative(Box::new(self.inner.clone())); + Ok(Self { inner }) + } } #[savvy] @@ -98,17 +104,75 @@ impl SedonaDBExprFactory { Ok(SedonaDBExpr { inner }) } + fn binary( + &self, + op: &str, + lhs: &SedonaDBExpr, + rhs: &SedonaDBExpr, + ) -> savvy::Result { + let operator = match op { + "==" => Operator::Eq, + "!=" => Operator::NotEq, + ">" => Operator::Gt, + ">=" => Operator::GtEq, + "<" => Operator::Lt, + "<=" => Operator::LtEq, + "+" => Operator::Plus, + "-" => Operator::Minus, + "*" => Operator::Multiply, + "/" => Operator::Divide, + "&" => Operator::And, + "|" => Operator::Or, + other => return Err(savvy_err!("Unimplemented binary operation '{other}'")), + }; + + let inner = Expr::BinaryExpr(BinaryExpr::new( + Box::new(lhs.inner.clone()), + operator, + Box::new(rhs.inner.clone()), + )); + Ok(SedonaDBExpr { inner }) + } + fn scalar_function(&self, name: &str, args: savvy::Sexp) -> savvy::Result { - if let Some(scalar_udf) = self.ctx.ctx.state().scalar_functions().get(name) { + if let Some(udf) = self.ctx.ctx.state().scalar_functions().get(name) { let args = Self::exprs(args)?; - let inner = ScalarFunction::new_udf(scalar_udf.clone(), args); - Ok(SedonaDBExpr { - inner: Expr::ScalarFunction(inner), - }) + let inner = Expr::ScalarFunction(ScalarFunction::new_udf(udf.clone(), args)); + Ok(SedonaDBExpr { inner }) } else { Err(savvy_err!("Scalar UDF '{name}' not found")) } } + + fn aggregate_function( + &self, + name: &str, + args: savvy::Sexp, + na_rm: Option, + distinct: Option, + ) -> savvy::Result { + if let Some(udf) = self.ctx.ctx.state().aggregate_functions().get(name) { + let args = Self::exprs(args)?; + let null_treatment = if na_rm.unwrap_or(true) { + NullTreatment::IgnoreNulls + } else { + NullTreatment::RespectNulls + }; + + let inner = Expr::AggregateFunction(AggregateFunction::new_udf( + udf.clone(), + args, + distinct.unwrap_or(false), + None, // filter + vec![], // order by + Some(null_treatment), + )); + + Ok(SedonaDBExpr { inner }) + } else { + Err(savvy_err!("Aggregate UDF '{name}' not found")) + } + } } impl SedonaDBExprFactory { diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index a7c16761f..1e3229f03 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -38,6 +38,30 @@ Int32(1) AS foofy +--- + + Code + sd_expr_binary("+", 1L, 2L) + Output + + Int32(1) + Int32(2) + +--- + + Code + sd_expr_negative(1L) + Output + + (- Int32(1)) + +--- + + Code + sd_expr_aggregate_function("sum", list(1L)) + Output + + sum(Int32(1)) RESPECT NULLS + # literal expressions can be translated Code @@ -78,6 +102,14 @@ abs(Int32(-1)) +--- + + Code + sd_eval_expr(quote(base::abs(-1L))) + Output + + abs(Int32(-1)) + # function calls without a translation are evaluated in R Code @@ -86,3 +118,13 @@ Int32(2) +# errors that occur during evaluation have reasonable context + + Code + sd_eval_expr(quote(stop("this will error"))) + Condition + Error in `sd_eval_expr()`: + ! Error evaluating translated expression `stop("this will error")` + Caused by error: + ! this will error + diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index d4079bad4..2a53a06cb 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -21,6 +21,9 @@ test_that("basic expression types can be constructed", { expect_snapshot(sd_expr_scalar_function("abs", list(1L))) expect_snapshot(sd_expr_cast(1L, nanoarrow::na_int64())) expect_snapshot(sd_expr_alias(1L, "foofy")) + expect_snapshot(sd_expr_binary("+", 1L, 2L)) + expect_snapshot(sd_expr_negative(1L)) + expect_snapshot(sd_expr_aggregate_function("sum", list(1L))) }) test_that("casts to a type with extension metadata can't be constructed", { @@ -50,10 +53,17 @@ test_that("column expressions can be translated", { }) test_that("function calls with a translation become function calls", { + # Should work for the qualified or unqualified versions expect_snapshot(sd_eval_expr(quote(abs(-1L)))) + expect_snapshot(sd_eval_expr(quote(base::abs(-1L)))) }) test_that("function calls without a translation are evaluated in R", { function_without_a_translation <- function(x) x + 1L expect_snapshot(sd_eval_expr(quote(function_without_a_translation(1L)))) }) + +test_that("errors that occur during evaluation have reasonable context", { + function_without_a_translation <- function(x) x + 1L + expect_snapshot(sd_eval_expr(quote(stop("this will error"))), error = TRUE) +}) diff --git a/r/sedonadb/tests/testthat/test-literal.R b/r/sedonadb/tests/testthat/test-literal.R index 3d41b93f1..a9f09406b 100644 --- a/r/sedonadb/tests/testthat/test-literal.R +++ b/r/sedonadb/tests/testthat/test-literal.R @@ -42,6 +42,13 @@ test_that("basic literals can be converted to expressions", { ) }) +test_that("literals can request a type", { + expect_identical( + as_sedonadb_literal(1.0, type = nanoarrow::na_float())$debug_string(), + "Cast(Cast { expr: Literal(Float64(1), None), data_type: Float32 })" + ) +}) + test_that("literals with Arrow extension metadata can be converted to literals", { expect_snapshot(as_sedonadb_literal(wk::as_wkb("POINT (0 1)"))) }) From 4287b9a84ea9f271bfeb7f3eb6eda245b6668996 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 2 Jan 2026 00:29:49 -0600 Subject: [PATCH 18/26] a few tests --- r/sedonadb/R/expression.R | 45 ++++++++++++++++--- .../tests/testthat/_snaps/expression.md | 8 ++++ r/sedonadb/tests/testthat/test-expression.R | 11 +++++ 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 5f119dff4..6ba6e0a95 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -135,6 +135,8 @@ print.SedonaDBExpr <- function(x, ...) { #' @returns A `SedonaDBExpr` #' @noRd sd_eval_expr <- function(expr, expr_ctx = sd_expr_ctx(env = env), env = parent.frame()) { + ensure_translations_registered() + rlang::try_fetch({ result <- sd_eval_expr_inner(expr, expr_ctx) as_sd_expr(result, factory = factory) @@ -246,10 +248,41 @@ sd_register_translation <- function(qualified_name, fn) { default_fns <- new.env(parent = emptyenv()) -sd_register_translation("base::abs", function(.factory, x) { - sd_expr_scalar_function("abs", list(x), factory = .factory) -}) +# Register translations lazily because SQL users don't need them and because +# we need rlang for this and it is currently in Suggests +ensure_translations_registered <- function() { + if (!is.null(default_fns$abs)) { + return() + } + + sd_register_translation("base::abs", function(.factory, x) { + sd_expr_scalar_function("abs", list(x), factory = .factory) + }) + + sd_register_translation("base::sum", function(.factory, x, ..., na.rm = FALSE) { + sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .factory) + }) + + sd_register_translation("base::+", function(.factory, lhs, rhs) { + if (missing(rhs)) { + # Use a double negative to ensure this fails for non-numeric types + sd_expr_negative(sd_expr_negative(lhs)) + } else { + sd_expr_binary("+", lhs, rhs) + } + }) + + sd_register_translation("base::-", function(.factory, lhs, rhs) { + if (missing(rhs)) { + sd_expr_negative(lhs) + } else { + sd_expr_binary("-", lhs, rhs) + } + }) -sd_register_translation("base::sum", function(.factory, x, ..., na.rm = FALSE) { - sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .factory) -}) + for (op in c("==", "!=", ">", ">=", "<", "<=", "*", "/", "&", "|")) { + sd_register_translation(paste0("base::", op), rlang::inject(function(.factory, lhs, rhs) { + sd_expr_binary(!!op, lhs, rhs, factory = .factory) + })) + } +} diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index 1e3229f03..e585ce14d 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -118,6 +118,14 @@ Int32(2) +# function calls that map to binary expressions are translated + + Code + sd_eval_expr(quote(1 + 2)) + Output + + Float64(1) + Float64(2) + # errors that occur during evaluation have reasonable context Code diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R index 2a53a06cb..f0f3d5af2 100644 --- a/r/sedonadb/tests/testthat/test-expression.R +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -63,6 +63,17 @@ test_that("function calls without a translation are evaluated in R", { expect_snapshot(sd_eval_expr(quote(function_without_a_translation(1L)))) }) +test_that("function calls that map to binary expressions are translated", { + # + and - are special-cased because in R the unary function calls are valid + expect_snapshot(sd_eval_expr(quote(+2))) + expect_snapshot(sd_eval_expr(quote(1 + 2))) + expect_snapshot(sd_eval_expr(quote(-2))) + expect_snapshot(sd_eval_expr(quote(1 - 2))) + + # normal translation + expect_snapshot(sd_eval_expr(quote(1 > 2))) +}) + test_that("errors that occur during evaluation have reasonable context", { function_without_a_translation <- function(x) x + 1L expect_snapshot(sd_eval_expr(quote(stop("this will error"))), error = TRUE) From c80f3903e2d03081d4a119e34babacc95e8bfa3a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 2 Jan 2026 00:36:47 -0600 Subject: [PATCH 19/26] pass the context instead of the factory --- r/sedonadb/R/expression.R | 29 ++++++++------- .../tests/testthat/_snaps/expression.md | 36 +++++++++++++++++-- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 6ba6e0a95..51279abcd 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -178,8 +178,8 @@ sd_eval_translation <- function(fn_key, expr, expr_ctx) { # override this step to have more control over the expression evaluation. evaluated_args <- lapply(expr[-1], sd_eval_expr, expr_ctx = expr_ctx) - # Recreate the call, injecting the factory as the first argument - new_call <- rlang::call2(new_fn_expr, expr_ctx$factory, !!!evaluated_args) + # Recreate the call, injecting the context as the first argument + new_call <- rlang::call2(new_fn_expr, expr_ctx, !!!evaluated_args) # ...and evaluate it sd_eval_default(new_call, expr_ctx) @@ -217,7 +217,6 @@ sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { factory = sd_expr_factory(), schema = schema, data = rlang::as_data_mask(data), - data_names = data_names, env = env, fns = default_fns ), @@ -255,34 +254,34 @@ ensure_translations_registered <- function() { return() } - sd_register_translation("base::abs", function(.factory, x) { - sd_expr_scalar_function("abs", list(x), factory = .factory) + sd_register_translation("base::abs", function(.ctx, x) { + sd_expr_scalar_function("abs", list(x), factory = .ctx$factory) }) - sd_register_translation("base::sum", function(.factory, x, ..., na.rm = FALSE) { - sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .factory) + sd_register_translation("base::sum", function(.ctx, x, ..., na.rm = FALSE) { + sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .ctx$.factory) }) - sd_register_translation("base::+", function(.factory, lhs, rhs) { + sd_register_translation("base::+", function(.ctx, lhs, rhs) { if (missing(rhs)) { # Use a double negative to ensure this fails for non-numeric types - sd_expr_negative(sd_expr_negative(lhs)) + sd_expr_negative(sd_expr_negative(lhs, factory = .ctx$factory), factory = .ctx$factory) } else { - sd_expr_binary("+", lhs, rhs) + sd_expr_binary("+", lhs, rhs, factory = .ctx$factory) } }) - sd_register_translation("base::-", function(.factory, lhs, rhs) { + sd_register_translation("base::-", function(.ctx, lhs, rhs) { if (missing(rhs)) { - sd_expr_negative(lhs) + sd_expr_negative(lhs, factory = .ctx$factory) } else { - sd_expr_binary("-", lhs, rhs) + sd_expr_binary("-", lhs, rhs, factory = .ctx$factory) } }) for (op in c("==", "!=", ">", ">=", "<", "<=", "*", "/", "&", "|")) { - sd_register_translation(paste0("base::", op), rlang::inject(function(.factory, lhs, rhs) { - sd_expr_binary(!!op, lhs, rhs, factory = .factory) + sd_register_translation(paste0("base::", op), rlang::inject(function(.ctx, lhs, rhs) { + sd_expr_binary(!!op, lhs, rhs, factory = .ctx$factory) })) } } diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md index e585ce14d..5d7452a58 100644 --- a/r/sedonadb/tests/testthat/_snaps/expression.md +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -100,7 +100,7 @@ sd_eval_expr(quote(abs(-1L))) Output - abs(Int32(-1)) + abs((- Int32(1))) --- @@ -108,7 +108,7 @@ sd_eval_expr(quote(base::abs(-1L))) Output - abs(Int32(-1)) + abs((- Int32(1))) # function calls without a translation are evaluated in R @@ -120,12 +120,44 @@ # function calls that map to binary expressions are translated + Code + sd_eval_expr(quote(+2)) + Output + + (- (- Float64(2))) + +--- + Code sd_eval_expr(quote(1 + 2)) Output Float64(1) + Float64(2) +--- + + Code + sd_eval_expr(quote(-2)) + Output + + (- Float64(2)) + +--- + + Code + sd_eval_expr(quote(1 - 2)) + Output + + Float64(1) - Float64(2) + +--- + + Code + sd_eval_expr(quote(1 > 2)) + Output + + Float64(1) > Float64(2) + # errors that occur during evaluation have reasonable context Code From 35c1aa45ff06b20da1d51da5c663bc4605f1a01c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 2 Jan 2026 00:38:37 -0600 Subject: [PATCH 20/26] update doc --- r/sedonadb/R/expression.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 51279abcd..55094835f 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -229,9 +229,9 @@ sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { #' @param qualified_name The name of the function in the form `pkg::fun` or #' `fun` if the package name is not relevant. This allows translations to #' support calls to `fun()` or `pkg::fun()` that appear in an R expression. -#' @param fn A function. The first argument must always be `.factory`, which -#' is the instance of `SedonaDBExprFactory` that may be used to construct -#' the required expressions. +#' @param fn A function. The first argument must always be `.ctx`, which +#' is the instance of [sd_expr_ctx()] that may be used to construct +#' the required expressions (using `$factory`). #' #' @returns fn, invisibly #' @noRd From ea1d8bb347d542045623916da3619c03477a96e7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 2 Jan 2026 00:42:59 -0600 Subject: [PATCH 21/26] maybe make the diff more readable --- r/sedonadb/.gitattributes | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 r/sedonadb/.gitattributes diff --git a/r/sedonadb/.gitattributes b/r/sedonadb/.gitattributes new file mode 100644 index 000000000..3f8d0f757 --- /dev/null +++ b/r/sedonadb/.gitattributes @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +R/000-wrappers.R linguist-generated +src/init.c linguist-generated +src/rust/api.h linguist-generated From d25c307f98b0ada540e044a029c827776a5e4c46 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 2 Jan 2026 01:22:01 -0600 Subject: [PATCH 22/26] fix docs and evaluation errors --- r/sedonadb/NAMESPACE | 1 + r/sedonadb/R/expression.R | 6 +++--- r/sedonadb/man/sd_expr_column.Rd | 10 ++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index 97b8969ce..11a141b96 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -36,6 +36,7 @@ export(sd_compute) export(sd_configure_proj) export(sd_count) export(sd_drop_view) +export(sd_expr_aggregate_function) export(sd_expr_alias) export(sd_expr_binary) export(sd_expr_cast) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 55094835f..65f820e2f 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -176,7 +176,7 @@ sd_eval_translation <- function(fn_key, expr, expr_ctx) { # Evaluate arguments individually. We may need to allow translations to # override this step to have more control over the expression evaluation. - evaluated_args <- lapply(expr[-1], sd_eval_expr, expr_ctx = expr_ctx) + evaluated_args <- lapply(expr[-1], sd_eval_expr_inner, expr_ctx = expr_ctx) # Recreate the call, injecting the context as the first argument new_call <- rlang::call2(new_fn_expr, expr_ctx, !!!evaluated_args) @@ -230,7 +230,7 @@ sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { #' `fun` if the package name is not relevant. This allows translations to #' support calls to `fun()` or `pkg::fun()` that appear in an R expression. #' @param fn A function. The first argument must always be `.ctx`, which -#' is the instance of [sd_expr_ctx()] that may be used to construct +#' is the instance of `sd_expr_ctx()` that may be used to construct #' the required expressions (using `$factory`). #' #' @returns fn, invisibly @@ -259,7 +259,7 @@ ensure_translations_registered <- function() { }) sd_register_translation("base::sum", function(.ctx, x, ..., na.rm = FALSE) { - sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .ctx$.factory) + sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .ctx$factory) }) sd_register_translation("base::+", function(.ctx, lhs, rhs) { diff --git a/r/sedonadb/man/sd_expr_column.Rd b/r/sedonadb/man/sd_expr_column.Rd index 3068c0c90..50c41a89f 100644 --- a/r/sedonadb/man/sd_expr_column.Rd +++ b/r/sedonadb/man/sd_expr_column.Rd @@ -6,6 +6,7 @@ \alias{sd_expr_binary} \alias{sd_expr_negative} \alias{sd_expr_scalar_function} +\alias{sd_expr_aggregate_function} \alias{sd_expr_cast} \alias{sd_expr_alias} \alias{as_sd_expr} @@ -23,6 +24,15 @@ sd_expr_negative(expr, factory = sd_expr_factory()) sd_expr_scalar_function(function_name, args, factory = sd_expr_factory()) +sd_expr_aggregate_function( + function_name, + args, + ..., + na.rm = FALSE, + distinct = FALSE, + factory = sd_expr_factory() +) + sd_expr_cast(expr, type, factory = sd_expr_factory()) sd_expr_alias(expr, alias, factory = sd_expr_factory()) From 0cf36d63f8fd94e0454d3f6340db83f6372b8140 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 2 Jan 2026 01:22:37 -0600 Subject: [PATCH 23/26] Update r/sedonadb/src/rust/src/ffi.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- r/sedonadb/src/rust/src/ffi.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/r/sedonadb/src/rust/src/ffi.rs b/r/sedonadb/src/rust/src/ffi.rs index 5ffb40cf6..828364c38 100644 --- a/r/sedonadb/src/rust/src/ffi.rs +++ b/r/sedonadb/src/rust/src/ffi.rs @@ -49,8 +49,9 @@ pub fn import_array( ) -> savvy::Result<(Field, ArrayRef)> { let field = import_field(schema_xptr)?; let ffi_array_ref: &mut FFI_ArrowArray = import_xptr(&mut xptr, "nanoarrow_array")?; - let ffi_array = unsafe { FFI_ArrowArray::from_raw(ffi_array_ref as _) }; - let array_data = unsafe { from_ffi_and_data_type(ffi_array as _, field.data_type().clone())? }; + let ffi_array_owned = unsafe { FFI_ArrowArray::from_raw(ffi_array_ref as _) }; + let array_data = + unsafe { from_ffi_and_data_type(ffi_array_owned as _, field.data_type().clone())? }; let array_ref = make_array(array_data); Ok((field, array_ref)) } From 6f5818f503456628cb04109b7c37d701629845b1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 2 Jan 2026 01:29:16 -0600 Subject: [PATCH 24/26] Update r/sedonadb/R/expression.R Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- r/sedonadb/R/expression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 65f820e2f..6a53faf0b 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -139,7 +139,7 @@ sd_eval_expr <- function(expr, expr_ctx = sd_expr_ctx(env = env), env = parent.f rlang::try_fetch({ result <- sd_eval_expr_inner(expr, expr_ctx) - as_sd_expr(result, factory = factory) + as_sd_expr(result, factory = expr_ctx$factory) }, error = function(e) { rlang::abort( sprintf("Error evaluating translated expression %s", rlang::expr_label(expr)), From 8b0c6b3b980170e7ffe3c70436bdef992cdeb6b7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 5 Jan 2026 13:40:28 -0600 Subject: [PATCH 25/26] fix build --- r/sedonadb/src/rust/src/expression.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs index a52fb34e9..5623f5745 100644 --- a/r/sedonadb/src/rust/src/expression.rs +++ b/r/sedonadb/src/rust/src/expression.rs @@ -19,8 +19,7 @@ use std::sync::Arc; use datafusion_common::{Column, ScalarValue}; use datafusion_expr::{ - expr::{AggregateFunction, FieldMetadata, ScalarFunction}, - sqlparser::ast::NullTreatment, + expr::{AggregateFunction, FieldMetadata, NullTreatment, ScalarFunction}, BinaryExpr, Cast, Expr, Operator, }; use savvy::{savvy, savvy_err}; From fc7ae2bbf8acf74e0b472355c30574f632a01845 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 5 Jan 2026 13:54:44 -0600 Subject: [PATCH 26/26] simplify list of expr to r --- r/sedonadb/R/expression.R | 8 ++------ r/sedonadb/src/rust/src/expression.rs | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R index 6a53faf0b..50240d4a3 100644 --- a/r/sedonadb/R/expression.R +++ b/r/sedonadb/R/expression.R @@ -67,9 +67,7 @@ sd_expr_negative <- function(expr, factory = sd_expr_factory()) { #' @export sd_expr_scalar_function <- function(function_name, args, factory = sd_expr_factory()) { args_as_expr <- lapply(args, as_sd_expr, factory = factory) - # Not sure why we need this exactly (something about savvy) - args_as_expr_ptr <- lapply(args_as_expr, "[[", ".ptr") - factory$scalar_function(function_name, args_as_expr_ptr) + factory$scalar_function(function_name, args_as_expr) } #' @rdname sd_expr_column @@ -77,9 +75,7 @@ sd_expr_scalar_function <- function(function_name, args, factory = sd_expr_facto sd_expr_aggregate_function <- function(function_name, args, ..., na.rm = FALSE, distinct = FALSE, factory = sd_expr_factory()) { args_as_expr <- lapply(args, as_sd_expr, factory = factory) - # Not sure why we need this exactly (something about savvy) - args_as_expr_ptr <- lapply(args_as_expr, "[[", ".ptr") - factory$aggregate_function(function_name, args_as_expr_ptr, na_rm = na.rm, distinct = distinct) + factory$aggregate_function(function_name, args_as_expr, na_rm = na.rm, distinct = distinct) } #' @rdname sd_expr_column diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs index 5623f5745..0add4b535 100644 --- a/r/sedonadb/src/rust/src/expression.rs +++ b/r/sedonadb/src/rust/src/expression.rs @@ -22,7 +22,7 @@ use datafusion_expr::{ expr::{AggregateFunction, FieldMetadata, NullTreatment, ScalarFunction}, BinaryExpr, Cast, Expr, Operator, }; -use savvy::{savvy, savvy_err}; +use savvy::{savvy, savvy_err, EnvironmentSexp}; use sedona::context::SedonaContext; use crate::{ @@ -179,11 +179,21 @@ impl SedonaDBExprFactory { savvy::ListSexp::try_from(exprs_sexp)? .iter() .map(|(_, item)| -> savvy::Result { - // This seems to require $.ptr from the list() input (can't just - // use list of R SedonaDBExpr objects) - let expr_wrapper: &SedonaDBExpr = item.try_into()?; + // item here is the Environment wrapper around the external pointer + let expr_wrapper: &SedonaDBExpr = EnvironmentSexp::try_from(item)?.try_into()?; Ok(expr_wrapper.inner.clone()) }) .collect() } } + +impl TryFrom for &SedonaDBExpr { + type Error = savvy::Error; + + fn try_from(env: EnvironmentSexp) -> Result { + env.get(".ptr")? + .map(<&SedonaDBExpr>::try_from) + .transpose()? + .ok_or(savvy_err!("Invalid SedonaDBExpr object.")) + } +}