diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d8a932894..fba9714aa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,8 @@ repos: - id: check-yaml - id: detect-private-key - id: end-of-file-fixer + # R snapshot test files may have arbitrary file endings based on test results + exclude: "_snaps" - id: fix-byte-order-marker - id: trailing-whitespace @@ -34,7 +36,7 @@ repos: - id: codespell # types_or: [markdown, c, c++, rust, python] additional_dependencies: [tomli] - exclude: "^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-libgpuspatial/libgpuspatial|sedona-tg/src/tg)/.*|^docs/image/sedonadb-architecture\\.svg$" + exclude: "^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-libgpuspatial/libgpuspatial|sedona-tg/src/tg)/.*|^docs/image/sedonadb-architecture\\.svg$|^r/sedonadb/tools/savvy-update.sh$" args: ["--ignore-words-list=thirdparty"] - repo: https://github.com/astral-sh/ruff-pre-commit diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 6938c0fc7..bad4cb5c6 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -25,4 +25,5 @@ r/sedonadb/.Rbuildignore r/sedonadb/DESCRIPTION r/sedonadb/NAMESPACE r/sedonadb/src/sedonadb-win.def +r/sedonadb/tests/testthat/_snaps/* submodules/geoarrow-data/* diff --git a/r/sedonadb/.gitattributes b/r/sedonadb/.gitattributes new file mode 100644 index 000000000..3f8d0f757 --- /dev/null +++ b/r/sedonadb/.gitattributes @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +R/000-wrappers.R linguist-generated +src/init.c linguist-generated +src/rust/api.h linguist-generated diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE index f7eb1ee73..11a141b96 100644 --- a/r/sedonadb/NAMESPACE +++ b/r/sedonadb/NAMESPACE @@ -10,19 +10,41 @@ S3method(as_sedonadb_dataframe,nanoarrow_array) S3method(as_sedonadb_dataframe,nanoarrow_array_stream) S3method(as_sedonadb_dataframe,sedonadb_dataframe) S3method(as_sedonadb_dataframe,sf) +S3method(as_sedonadb_literal,"NULL") +S3method(as_sedonadb_literal,character) +S3method(as_sedonadb_literal,double) +S3method(as_sedonadb_literal,integer) +S3method(as_sedonadb_literal,nanoarrow_array) +S3method(as_sedonadb_literal,raw) +S3method(as_sedonadb_literal,wk_wkb) S3method(dim,sedonadb_dataframe) S3method(dimnames,sedonadb_dataframe) S3method(head,sedonadb_dataframe) S3method(infer_nanoarrow_schema,sedonadb_dataframe) S3method(print,"sedonadb::InternalContext__bundle") S3method(print,"sedonadb::InternalDataFrame__bundle") +S3method(print,"sedonadb::SedonaDBExprFactory__bundle") +S3method(print,"sedonadb::SedonaDBExpr__bundle") +S3method(print,SedonaDBExpr) S3method(print,sedonadb_dataframe) +export(as_sd_expr) export(as_sedonadb_dataframe) +export(as_sedonadb_literal) +export(is_sd_expr) export(sd_collect) export(sd_compute) export(sd_configure_proj) export(sd_count) export(sd_drop_view) +export(sd_expr_aggregate_function) +export(sd_expr_alias) +export(sd_expr_binary) +export(sd_expr_cast) +export(sd_expr_column) +export(sd_expr_factory) +export(sd_expr_literal) +export(sd_expr_negative) +export(sd_expr_scalar_function) export(sd_preview) export(sd_read_parquet) export(sd_register_udf) diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R index df1f61fd9..40fd3ce4c 100644 --- a/r/sedonadb/R/000-wrappers.R +++ b/r/sedonadb/R/000-wrappers.R @@ -265,3 +265,126 @@ class(`InternalDataFrame`) <- c("sedonadb::InternalDataFrame__bundle", "savvy_se `print.sedonadb::InternalDataFrame__bundle` <- function(x, ...) { cat('sedonadb::InternalDataFrame\n') } + +### wrapper functions for SedonaDBExpr + +`SedonaDBExpr_alias` <- function(self) { + function(`name`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_alias__impl, `self`, `name`)) + } +} + +`SedonaDBExpr_cast` <- function(self) { + function(`schema_xptr`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_cast__impl, `self`, `schema_xptr`)) + } +} + +`SedonaDBExpr_debug_string` <- function(self) { + function() { + .Call(savvy_SedonaDBExpr_debug_string__impl, `self`) + } +} + +`SedonaDBExpr_display` <- function(self) { + function() { + .Call(savvy_SedonaDBExpr_display__impl, `self`) + } +} + +`SedonaDBExpr_negate` <- function(self) { + function() { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_negate__impl, `self`)) + } +} + +`.savvy_wrap_SedonaDBExpr` <- function(ptr) { + e <- new.env(parent = emptyenv()) + e$.ptr <- ptr + e$`alias` <- `SedonaDBExpr_alias`(ptr) + e$`cast` <- `SedonaDBExpr_cast`(ptr) + e$`debug_string` <- `SedonaDBExpr_debug_string`(ptr) + e$`display` <- `SedonaDBExpr_display`(ptr) + e$`negate` <- `SedonaDBExpr_negate`(ptr) + + class(e) <- c("sedonadb::SedonaDBExpr", "SedonaDBExpr", "savvy_sedonadb__sealed") + e +} + + + +`SedonaDBExpr` <- new.env(parent = emptyenv()) + +### associated functions for SedonaDBExpr + + + +class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle", "savvy_sedonadb__sealed") + +#' @export +`print.sedonadb::SedonaDBExpr__bundle` <- function(x, ...) { + cat('sedonadb::SedonaDBExpr\n') +} + +### wrapper functions for SedonaDBExprFactory + +`SedonaDBExprFactory_aggregate_function` <- function(self) { + function(`name`, `args`, `na_rm` = NULL, `distinct` = NULL) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_aggregate_function__impl, `self`, `name`, `args`, `na_rm`, `distinct`)) + } +} + +`SedonaDBExprFactory_binary` <- function(self) { + function(`op`, `lhs`, `rhs`) { + `lhs` <- .savvy_extract_ptr(`lhs`, "sedonadb::SedonaDBExpr") + `rhs` <- .savvy_extract_ptr(`rhs`, "sedonadb::SedonaDBExpr") + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_binary__impl, `self`, `op`, `lhs`, `rhs`)) + } +} + +`SedonaDBExprFactory_column` <- function(self) { + function(`name`, `qualifier` = NULL) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_column__impl, `self`, `name`, `qualifier`)) + } +} + +`SedonaDBExprFactory_scalar_function` <- function(self) { + function(`name`, `args`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_scalar_function__impl, `self`, `name`, `args`)) + } +} + +`.savvy_wrap_SedonaDBExprFactory` <- function(ptr) { + e <- new.env(parent = emptyenv()) + e$.ptr <- ptr + e$`aggregate_function` <- `SedonaDBExprFactory_aggregate_function`(ptr) + e$`binary` <- `SedonaDBExprFactory_binary`(ptr) + e$`column` <- `SedonaDBExprFactory_column`(ptr) + e$`scalar_function` <- `SedonaDBExprFactory_scalar_function`(ptr) + + class(e) <- c("sedonadb::SedonaDBExprFactory", "SedonaDBExprFactory", "savvy_sedonadb__sealed") + e +} + + + +`SedonaDBExprFactory` <- new.env(parent = emptyenv()) + +### associated functions for SedonaDBExprFactory + +`SedonaDBExprFactory`$`literal` <- function(`array_xptr`, `schema_xptr`) { + .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_literal__impl, `array_xptr`, `schema_xptr`)) +} + +`SedonaDBExprFactory`$`new` <- function(`ctx`) { + `ctx` <- .savvy_extract_ptr(`ctx`, "sedonadb::InternalContext") + .savvy_wrap_SedonaDBExprFactory(.Call(savvy_SedonaDBExprFactory_new__impl, `ctx`)) +} + + +class(`SedonaDBExprFactory`) <- c("sedonadb::SedonaDBExprFactory__bundle", "savvy_sedonadb__sealed") + +#' @export +`print.sedonadb::SedonaDBExprFactory__bundle` <- function(x, ...) { + cat('sedonadb::SedonaDBExprFactory\n') +} diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R new file mode 100644 index 000000000..50240d4a3 --- /dev/null +++ b/r/sedonadb/R/expression.R @@ -0,0 +1,283 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Create SedonaDB logical expressions +#' +#' @param column_name A column name +#' @param qualifier An optional qualifier (e.g., table reference) that may be +#' used to disambiguate a specific reference +#' @param function_name The name of the function to call. This name is resolved +#' from the context associated with `factory`. +#' @param type A destination type into which `expr` should be cast. +#' @param expr A SedonaDBExpr or object coercible to one with [as_sd_expr()]. +#' @param alias An alias to apply to `expr`. +#' @param op Operator name for a binary expression. In general these follow +#' R function names (e.g., `>`, `<`, `+`, `-`). +#' @param lhs,rhs Arguments to a binary expression +#' @param factory A [sd_expr_factory()]. This factory wraps a SedonaDB context +#' and is used to resolve scalar functions and/or retrieve options. +#' +#' @returns An object of class SedonaDBExpr +#' @export +#' +#' @examples +#' sd_expr_column("foofy") +#' sd_expr_literal(1L) +#' sd_expr_scalar_function("abs", list(1L)) +#' sd_expr_cast(1L, nanoarrow::na_int64()) +#' sd_expr_alias(1L, "foofy") +#' +sd_expr_column <- function(column_name, qualifier = NULL, factory = sd_expr_factory()) { + factory$column(column_name, qualifier) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_literal <- function(x, type = NULL, factory = sd_expr_factory()) { + as_sedonadb_literal(x, type = type, factory = factory) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_binary <- function(op, lhs, rhs, factory = sd_expr_factory()) { + factory$binary(op, as_sd_expr(lhs), as_sd_expr(rhs)) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_negative <- function(expr, factory = sd_expr_factory()) { + as_sd_expr(expr, factory = factory)$negate() +} + +#' @rdname sd_expr_column +#' @export +sd_expr_scalar_function <- function(function_name, args, factory = sd_expr_factory()) { + args_as_expr <- lapply(args, as_sd_expr, factory = factory) + factory$scalar_function(function_name, args_as_expr) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_aggregate_function <- function(function_name, args, ..., + na.rm = FALSE, distinct = FALSE, factory = sd_expr_factory()) { + args_as_expr <- lapply(args, as_sd_expr, factory = factory) + factory$aggregate_function(function_name, args_as_expr, na_rm = na.rm, distinct = distinct) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_cast <- function(expr, type, factory = sd_expr_factory()) { + expr <- as_sd_expr(expr, factory = factory) + type <- nanoarrow::as_nanoarrow_schema(type) + expr$cast(type) +} + +#' @rdname sd_expr_column +#' @export +sd_expr_alias <- function(expr, alias, factory = sd_expr_factory()) { + expr <- as_sd_expr(expr, factory = factory) + expr$alias(alias) +} + +#' @rdname sd_expr_column +#' @export +as_sd_expr <- function(x, factory = sd_expr_factory()) { + if (inherits(x, "SedonaDBExpr")) { + x + } else { + sd_expr_literal(x, factory = factory) + } +} + +#' @rdname sd_expr_column +#' @export +is_sd_expr <- function(x) { + inherits(x, "SedonaDBExpr") +} + +#' @rdname sd_expr_column +#' @export +sd_expr_factory <- function() { + SedonaDBExprFactory$new(ctx()) +} + +#' @export +print.SedonaDBExpr <- function(x, ...) { + cat("\n") + cat(x$display()) + cat("\n") + invisible(x) +} + +#' Evaluate an R expression into a SedonaDB expression +#' +#' @param expr An R expression (e.g., the result of `quote()`). +#' @param expr_ctx An `sd_expr_ctx()` +#' +#' @returns A `SedonaDBExpr` +#' @noRd +sd_eval_expr <- function(expr, expr_ctx = sd_expr_ctx(env = env), env = parent.frame()) { + ensure_translations_registered() + + rlang::try_fetch({ + result <- sd_eval_expr_inner(expr, expr_ctx) + as_sd_expr(result, factory = expr_ctx$factory) + }, error = function(e) { + rlang::abort( + sprintf("Error evaluating translated expression %s", rlang::expr_label(expr)), + parent = e + ) + }) +} + +sd_eval_expr_inner <- function(expr, expr_ctx) { + if (rlang::is_call(expr)) { + # Extract `pkg::fun` or `fun` if this is a usual call (e.g., not + # something fancy like `fun()()`) + call_name <- rlang::call_name(expr) + + # If this is not a fancy function call and we have a translation, call it. + # Individual translations can choose to defer to the R function if all the + # arguments are R objects and not SedonaDB expressions (or the user can + # use !! to force R evaluation). + if (!is.null(call_name) && !is.null(expr_ctx$fns[[call_name]])) { + sd_eval_translation(call_name, expr, expr_ctx) + } else { + sd_eval_default(expr, expr_ctx) + } + } else { + sd_eval_default(expr, expr_ctx) + } +} + +sd_eval_translation <- function(fn_key, expr, expr_ctx) { + # Replace the function with the translation in such a way that + # any error resulting from the call doesn't have an absolute garbage error + # stack trace + new_fn_expr <- rlang::call2("$", expr_ctx$fns, rlang::sym(fn_key)) + + # Evaluate arguments individually. We may need to allow translations to + # override this step to have more control over the expression evaluation. + evaluated_args <- lapply(expr[-1], sd_eval_expr_inner, expr_ctx = expr_ctx) + + # Recreate the call, injecting the context as the first argument + new_call <- rlang::call2(new_fn_expr, expr_ctx, !!!evaluated_args) + + # ...and evaluate it + sd_eval_default(new_call, expr_ctx) +} + +sd_eval_default <- function(expr, expr_ctx) { + rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env) +} + +#' Expression evaluation context +#' +#' A context to use for evaluating a set of related R expressions into +#' SedonaDB expressions. One expression context may be used to translate +#' multiple expressions (e.g., all arguments to `mutate()`). +#' +#' @param schema A schema-like object coerced using +#' [nanoarrow::as_nanoarrow_schema()]. This is used to create the data mask +#' for expressions. +#' @param env The expression environment. This is needed to evaluate expressions. +#' +#' @return An object of class sedonadb_expr_ctx +#' @noRd +sd_expr_ctx <- function(schema = NULL, env = parent.frame()) { + if (is.null(schema)) { + schema <- nanoarrow::na_struct() + } + + schema <- nanoarrow::as_nanoarrow_schema(schema) + data_names <- as.character(names(schema$children)) + data <- lapply(data_names, sd_expr_column) + names(data) <- data_names + + structure( + list( + factory = sd_expr_factory(), + schema = schema, + data = rlang::as_data_mask(data), + env = env, + fns = default_fns + ), + class = "sedonadb_expr_ctx" + ) +} + +#' Register an R function translation into a SedonaDB expression +#' +#' @param qualified_name The name of the function in the form `pkg::fun` or +#' `fun` if the package name is not relevant. This allows translations to +#' support calls to `fun()` or `pkg::fun()` that appear in an R expression. +#' @param fn A function. The first argument must always be `.ctx`, which +#' is the instance of `sd_expr_ctx()` that may be used to construct +#' the required expressions (using `$factory`). +#' +#' @returns fn, invisibly +#' @noRd +sd_register_translation <- function(qualified_name, fn) { + stopifnot(is.function(fn)) + + pieces <- strsplit(qualified_name, "::")[[1]] + unqualified_name <- pieces[[2]] + + default_fns[[qualified_name]] <- default_fns[[unqualified_name]] <- fn + invisible(fn) +} + +default_fns <- new.env(parent = emptyenv()) + +# Register translations lazily because SQL users don't need them and because +# we need rlang for this and it is currently in Suggests +ensure_translations_registered <- function() { + if (!is.null(default_fns$abs)) { + return() + } + + sd_register_translation("base::abs", function(.ctx, x) { + sd_expr_scalar_function("abs", list(x), factory = .ctx$factory) + }) + + sd_register_translation("base::sum", function(.ctx, x, ..., na.rm = FALSE) { + sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory = .ctx$factory) + }) + + sd_register_translation("base::+", function(.ctx, lhs, rhs) { + if (missing(rhs)) { + # Use a double negative to ensure this fails for non-numeric types + sd_expr_negative(sd_expr_negative(lhs, factory = .ctx$factory), factory = .ctx$factory) + } else { + sd_expr_binary("+", lhs, rhs, factory = .ctx$factory) + } + }) + + sd_register_translation("base::-", function(.ctx, lhs, rhs) { + if (missing(rhs)) { + sd_expr_negative(lhs, factory = .ctx$factory) + } else { + sd_expr_binary("-", lhs, rhs, factory = .ctx$factory) + } + }) + + for (op in c("==", "!=", ">", ">=", "<", "<=", "*", "/", "&", "|")) { + sd_register_translation(paste0("base::", op), rlang::inject(function(.ctx, lhs, rhs) { + sd_expr_binary(!!op, lhs, rhs, factory = .ctx$factory) + })) + } +} diff --git a/r/sedonadb/R/literal.R b/r/sedonadb/R/literal.R new file mode 100644 index 000000000..679239cab --- /dev/null +++ b/r/sedonadb/R/literal.R @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' S3 Generic to create a SedonaDB literal expression +#' +#' This generic provides the opportunity for objects to register a mechanism +#' to be understood as literals in the context of a SedonaDB expression. +#' Users constructing expressions directly should use [sd_expr_literal()]. +#' +#' @param x An object to convert to a SedonaDB literal +#' @param ... Passed to/from methods +#' @param type An optional data type to request for the output +#' @param factory An `sd_expr_factory()` that should be passed to any +#' other calls to `as_sedonadb_literal()` if needed +#' +#' @returns An object of class SedonaDBExpr +#' @export +#' +#' @examples +#' as_sedonadb_literal("abcd") +#' +as_sedonadb_literal <- function(x, ..., type = NULL, factory = NULL) { + UseMethod("as_sedonadb_literal") +} + +#' @export +as_sedonadb_literal.NULL <- function(x, ..., type = NULL) { + na <- nanoarrow::nanoarrow_array_init(nanoarrow::na_na()) |> + nanoarrow::nanoarrow_array_modify(list(length = 1L, null_count = 1L)) + as_sedonadb_literal_from_nanoarrow(na, ..., type = type) +} + +#' @export +as_sedonadb_literal.character <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_literal.integer <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_literal.double <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + +#' @export +as_sedonadb_literal.raw <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(list(x), ..., type = type) +} + +#' @export +as_sedonadb_literal.wk_wkb <- function(x, ..., type = NULL) { + as_sedonadb_literal_from_nanoarrow(x, ..., type = type) +} + +as_sedonadb_literal_from_nanoarrow <- function(x, ..., type = NULL) { + array <- nanoarrow::as_nanoarrow_array(x) + if (array$length != 1L) { + stop("Can't convert non-scalar to sedonadb_expr") + } + + as_sedonadb_literal(array, type = type) +} + +#' @export +as_sedonadb_literal.nanoarrow_array <- function(x, ..., type = NULL) { + schema <- nanoarrow::infer_nanoarrow_schema(x) + + array_export <- nanoarrow::nanoarrow_allocate_array() + nanoarrow::nanoarrow_pointer_export(x, array_export) + + expr <- SedonaDBExprFactory$literal(array_export, schema) + handle_type_request(expr, type) +} + +handle_type_request <- function(x, type) { + if (!is.null(type)) { + x$cast(nanoarrow::as_nanoarrow_schema(type)) + } else { + x + } +} diff --git a/r/sedonadb/man/as_sedonadb_literal.Rd b/r/sedonadb/man/as_sedonadb_literal.Rd new file mode 100644 index 000000000..448b0222f --- /dev/null +++ b/r/sedonadb/man/as_sedonadb_literal.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/literal.R +\name{as_sedonadb_literal} +\alias{as_sedonadb_literal} +\title{S3 Generic to create a SedonaDB literal expression} +\usage{ +as_sedonadb_literal(x, ..., type = NULL, factory = NULL) +} +\arguments{ +\item{x}{An object to convert to a SedonaDB literal} + +\item{...}{Passed to/from methods} + +\item{type}{An optional data type to request for the output} + +\item{factory}{An \code{sd_expr_factory()} that should be passed to any +other calls to \code{as_sedonadb_literal()} if needed} +} +\value{ +An object of class SedonaDBExpr +} +\description{ +This generic provides the opportunity for objects to register a mechanism +to be understood as literals in the context of a SedonaDB expression. +Users constructing expressions directly should use \code{\link[=sd_expr_literal]{sd_expr_literal()}}. +} +\examples{ +as_sedonadb_literal("abcd") + +} diff --git a/r/sedonadb/man/sd_expr_column.Rd b/r/sedonadb/man/sd_expr_column.Rd new file mode 100644 index 000000000..50c41a89f --- /dev/null +++ b/r/sedonadb/man/sd_expr_column.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expression.R +\name{sd_expr_column} +\alias{sd_expr_column} +\alias{sd_expr_literal} +\alias{sd_expr_binary} +\alias{sd_expr_negative} +\alias{sd_expr_scalar_function} +\alias{sd_expr_aggregate_function} +\alias{sd_expr_cast} +\alias{sd_expr_alias} +\alias{as_sd_expr} +\alias{is_sd_expr} +\alias{sd_expr_factory} +\title{Create SedonaDB logical expressions} +\usage{ +sd_expr_column(column_name, qualifier = NULL, factory = sd_expr_factory()) + +sd_expr_literal(x, type = NULL, factory = sd_expr_factory()) + +sd_expr_binary(op, lhs, rhs, factory = sd_expr_factory()) + +sd_expr_negative(expr, factory = sd_expr_factory()) + +sd_expr_scalar_function(function_name, args, factory = sd_expr_factory()) + +sd_expr_aggregate_function( + function_name, + args, + ..., + na.rm = FALSE, + distinct = FALSE, + factory = sd_expr_factory() +) + +sd_expr_cast(expr, type, factory = sd_expr_factory()) + +sd_expr_alias(expr, alias, factory = sd_expr_factory()) + +as_sd_expr(x, factory = sd_expr_factory()) + +is_sd_expr(x) + +sd_expr_factory() +} +\arguments{ +\item{column_name}{A column name} + +\item{qualifier}{An optional qualifier (e.g., table reference) that may be +used to disambiguate a specific reference} + +\item{factory}{A \code{\link[=sd_expr_factory]{sd_expr_factory()}}. This factory wraps a SedonaDB context +and is used to resolve scalar functions and/or retrieve options.} + +\item{type}{A destination type into which \code{expr} should be cast.} + +\item{op}{Operator name for a binary expression. In general these follow +R function names (e.g., \code{>}, \code{<}, \code{+}, \code{-}).} + +\item{lhs, rhs}{Arguments to a binary expression} + +\item{expr}{A SedonaDBExpr or object coercible to one with \code{\link[=as_sd_expr]{as_sd_expr()}}.} + +\item{function_name}{The name of the function to call. This name is resolved +from the context associated with \code{factory}.} + +\item{alias}{An alias to apply to \code{expr}.} +} +\value{ +An object of class SedonaDBExpr +} +\description{ +Create SedonaDB logical expressions +} +\examples{ +sd_expr_column("foofy") +sd_expr_literal(1L) +sd_expr_scalar_function("abs", list(1L)) +sd_expr_cast(1L, nanoarrow::na_int64()) +sd_expr_alias(1L, "foofy") + +} diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c index 8405b2cc4..6faa15217 100644 --- a/r/sedonadb/src/init.c +++ b/r/sedonadb/src/init.c @@ -209,6 +209,75 @@ SEXP savvy_InternalDataFrame_to_view__impl(SEXP self__, SEXP c_arg__ctx, return handle_result(res); } +SEXP savvy_SedonaDBExpr_alias__impl(SEXP self__, SEXP c_arg__name) { + SEXP res = savvy_SedonaDBExpr_alias__ffi(self__, c_arg__name); + return handle_result(res); +} + +SEXP savvy_SedonaDBExpr_cast__impl(SEXP self__, SEXP c_arg__schema_xptr) { + SEXP res = savvy_SedonaDBExpr_cast__ffi(self__, c_arg__schema_xptr); + return handle_result(res); +} + +SEXP savvy_SedonaDBExpr_debug_string__impl(SEXP self__) { + SEXP res = savvy_SedonaDBExpr_debug_string__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaDBExpr_display__impl(SEXP self__) { + SEXP res = savvy_SedonaDBExpr_display__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaDBExpr_negate__impl(SEXP self__) { + SEXP res = savvy_SedonaDBExpr_negate__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_aggregate_function__impl(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args, + SEXP c_arg__na_rm, + SEXP c_arg__distinct) { + SEXP res = savvy_SedonaDBExprFactory_aggregate_function__ffi( + self__, c_arg__name, c_arg__args, c_arg__na_rm, c_arg__distinct); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_binary__impl(SEXP self__, SEXP c_arg__op, + SEXP c_arg__lhs, SEXP c_arg__rhs) { + SEXP res = savvy_SedonaDBExprFactory_binary__ffi(self__, c_arg__op, + c_arg__lhs, c_arg__rhs); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_column__impl(SEXP self__, SEXP c_arg__name, + SEXP c_arg__qualifier) { + SEXP res = savvy_SedonaDBExprFactory_column__ffi(self__, c_arg__name, + c_arg__qualifier); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_literal__impl(SEXP c_arg__array_xptr, + SEXP c_arg__schema_xptr) { + SEXP res = savvy_SedonaDBExprFactory_literal__ffi(c_arg__array_xptr, + c_arg__schema_xptr); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_new__impl(SEXP c_arg__ctx) { + SEXP res = savvy_SedonaDBExprFactory_new__ffi(c_arg__ctx); + return handle_result(res); +} + +SEXP savvy_SedonaDBExprFactory_scalar_function__impl(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args) { + SEXP res = savvy_SedonaDBExprFactory_scalar_function__ffi(self__, c_arg__name, + c_arg__args); + return handle_result(res); +} + static const R_CallMethodDef CallEntries[] = { {"savvy_configure_proj_shared__impl", (DL_FUNC)&savvy_configure_proj_shared__impl, 3}, @@ -258,6 +327,28 @@ static const R_CallMethodDef CallEntries[] = { (DL_FUNC)&savvy_InternalDataFrame_to_provider__impl, 1}, {"savvy_InternalDataFrame_to_view__impl", (DL_FUNC)&savvy_InternalDataFrame_to_view__impl, 4}, + {"savvy_SedonaDBExpr_alias__impl", (DL_FUNC)&savvy_SedonaDBExpr_alias__impl, + 2}, + {"savvy_SedonaDBExpr_cast__impl", (DL_FUNC)&savvy_SedonaDBExpr_cast__impl, + 2}, + {"savvy_SedonaDBExpr_debug_string__impl", + (DL_FUNC)&savvy_SedonaDBExpr_debug_string__impl, 1}, + {"savvy_SedonaDBExpr_display__impl", + (DL_FUNC)&savvy_SedonaDBExpr_display__impl, 1}, + {"savvy_SedonaDBExpr_negate__impl", + (DL_FUNC)&savvy_SedonaDBExpr_negate__impl, 1}, + {"savvy_SedonaDBExprFactory_aggregate_function__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_aggregate_function__impl, 5}, + {"savvy_SedonaDBExprFactory_binary__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_binary__impl, 4}, + {"savvy_SedonaDBExprFactory_column__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_column__impl, 3}, + {"savvy_SedonaDBExprFactory_literal__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_literal__impl, 2}, + {"savvy_SedonaDBExprFactory_new__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_new__impl, 1}, + {"savvy_SedonaDBExprFactory_scalar_function__impl", + (DL_FUNC)&savvy_SedonaDBExprFactory_scalar_function__impl, 3}, {NULL, NULL, 0}}; void R_init_sedonadb(DllInfo *dll) { diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h index 201039e14..fac6258bd 100644 --- a/r/sedonadb/src/rust/api.h +++ b/r/sedonadb/src/rust/api.h @@ -60,3 +60,27 @@ SEXP savvy_InternalDataFrame_to_provider__ffi(SEXP self__); SEXP savvy_InternalDataFrame_to_view__ffi(SEXP self__, SEXP c_arg__ctx, SEXP c_arg__table_ref, SEXP c_arg__overwrite); + +// methods and associated functions for SedonaDBExpr +SEXP savvy_SedonaDBExpr_alias__ffi(SEXP self__, SEXP c_arg__name); +SEXP savvy_SedonaDBExpr_cast__ffi(SEXP self__, SEXP c_arg__schema_xptr); +SEXP savvy_SedonaDBExpr_debug_string__ffi(SEXP self__); +SEXP savvy_SedonaDBExpr_display__ffi(SEXP self__); +SEXP savvy_SedonaDBExpr_negate__ffi(SEXP self__); + +// methods and associated functions for SedonaDBExprFactory +SEXP savvy_SedonaDBExprFactory_aggregate_function__ffi(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args, + SEXP c_arg__na_rm, + SEXP c_arg__distinct); +SEXP savvy_SedonaDBExprFactory_binary__ffi(SEXP self__, SEXP c_arg__op, + SEXP c_arg__lhs, SEXP c_arg__rhs); +SEXP savvy_SedonaDBExprFactory_column__ffi(SEXP self__, SEXP c_arg__name, + SEXP c_arg__qualifier); +SEXP savvy_SedonaDBExprFactory_literal__ffi(SEXP c_arg__array_xptr, + SEXP c_arg__schema_xptr); +SEXP savvy_SedonaDBExprFactory_new__ffi(SEXP c_arg__ctx); +SEXP savvy_SedonaDBExprFactory_scalar_function__ffi(SEXP self__, + SEXP c_arg__name, + SEXP c_arg__args); diff --git a/r/sedonadb/src/rust/src/expression.rs b/r/sedonadb/src/rust/src/expression.rs new file mode 100644 index 000000000..0add4b535 --- /dev/null +++ b/r/sedonadb/src/rust/src/expression.rs @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion_common::{Column, ScalarValue}; +use datafusion_expr::{ + expr::{AggregateFunction, FieldMetadata, NullTreatment, ScalarFunction}, + BinaryExpr, Cast, Expr, Operator, +}; +use savvy::{savvy, savvy_err, EnvironmentSexp}; +use sedona::context::SedonaContext; + +use crate::{ + context::InternalContext, + ffi::{import_array, import_field}, +}; + +#[savvy] +pub struct SedonaDBExpr { + pub inner: Expr, +} + +#[savvy] +impl SedonaDBExpr { + fn display(&self) -> savvy::Result { + format!("{}", self.inner).try_into() + } + + fn debug_string(&self) -> savvy::Result { + format!("{:?}", self.inner).try_into() + } + + fn alias(&self, name: &str) -> savvy::Result { + let inner = self.inner.clone().alias_if_changed(name.to_string())?; + Ok(Self { inner }) + } + + fn cast(&self, schema_xptr: savvy::Sexp) -> savvy::Result { + let field = import_field(schema_xptr)?; + if let Some(type_name) = field.extension_type_name() { + return Err(savvy_err!( + "Can't cast to Arrow extension type '{type_name}'" + )); + } + + let inner = Expr::Cast(Cast::new( + self.inner.clone().into(), + field.data_type().clone(), + )); + + Ok(Self { inner }) + } + + fn negate(&self) -> savvy::Result { + let inner = Expr::Negative(Box::new(self.inner.clone())); + Ok(Self { inner }) + } +} + +#[savvy] +pub struct SedonaDBExprFactory { + pub ctx: Arc, +} + +#[savvy] +impl SedonaDBExprFactory { + fn new(ctx: &InternalContext) -> Self { + Self { + ctx: ctx.inner.clone(), + } + } + + fn literal(array_xptr: savvy::Sexp, schema_xptr: savvy::Sexp) -> savvy::Result { + let (field, array_ref) = import_array(array_xptr, schema_xptr)?; + let metadata = if field.metadata().is_empty() { + None + } else { + Some(FieldMetadata::new_from_field(&field)) + }; + + let scalar_value = ScalarValue::try_from_array(&array_ref, 0)?; + let inner = Expr::Literal(scalar_value, metadata); + Ok(SedonaDBExpr { inner }) + } + + fn column(&self, name: &str, qualifier: Option<&str>) -> savvy::Result { + let inner = Expr::Column(Column::new(qualifier, name)); + Ok(SedonaDBExpr { inner }) + } + + fn binary( + &self, + op: &str, + lhs: &SedonaDBExpr, + rhs: &SedonaDBExpr, + ) -> savvy::Result { + let operator = match op { + "==" => Operator::Eq, + "!=" => Operator::NotEq, + ">" => Operator::Gt, + ">=" => Operator::GtEq, + "<" => Operator::Lt, + "<=" => Operator::LtEq, + "+" => Operator::Plus, + "-" => Operator::Minus, + "*" => Operator::Multiply, + "/" => Operator::Divide, + "&" => Operator::And, + "|" => Operator::Or, + other => return Err(savvy_err!("Unimplemented binary operation '{other}'")), + }; + + let inner = Expr::BinaryExpr(BinaryExpr::new( + Box::new(lhs.inner.clone()), + operator, + Box::new(rhs.inner.clone()), + )); + Ok(SedonaDBExpr { inner }) + } + + fn scalar_function(&self, name: &str, args: savvy::Sexp) -> savvy::Result { + if let Some(udf) = self.ctx.ctx.state().scalar_functions().get(name) { + let args = Self::exprs(args)?; + let inner = Expr::ScalarFunction(ScalarFunction::new_udf(udf.clone(), args)); + Ok(SedonaDBExpr { inner }) + } else { + Err(savvy_err!("Scalar UDF '{name}' not found")) + } + } + + fn aggregate_function( + &self, + name: &str, + args: savvy::Sexp, + na_rm: Option, + distinct: Option, + ) -> savvy::Result { + if let Some(udf) = self.ctx.ctx.state().aggregate_functions().get(name) { + let args = Self::exprs(args)?; + let null_treatment = if na_rm.unwrap_or(true) { + NullTreatment::IgnoreNulls + } else { + NullTreatment::RespectNulls + }; + + let inner = Expr::AggregateFunction(AggregateFunction::new_udf( + udf.clone(), + args, + distinct.unwrap_or(false), + None, // filter + vec![], // order by + Some(null_treatment), + )); + + Ok(SedonaDBExpr { inner }) + } else { + Err(savvy_err!("Aggregate UDF '{name}' not found")) + } + } +} + +impl SedonaDBExprFactory { + fn exprs(exprs_sexp: savvy::Sexp) -> savvy::Result> { + savvy::ListSexp::try_from(exprs_sexp)? + .iter() + .map(|(_, item)| -> savvy::Result { + // item here is the Environment wrapper around the external pointer + let expr_wrapper: &SedonaDBExpr = EnvironmentSexp::try_from(item)?.try_into()?; + Ok(expr_wrapper.inner.clone()) + }) + .collect() + } +} + +impl TryFrom for &SedonaDBExpr { + type Error = savvy::Error; + + fn try_from(env: EnvironmentSexp) -> Result { + env.get(".ptr")? + .map(<&SedonaDBExpr>::try_from) + .transpose()? + .ok_or(savvy_err!("Invalid SedonaDBExpr object.")) + } +} diff --git a/r/sedonadb/src/rust/src/ffi.rs b/r/sedonadb/src/rust/src/ffi.rs index 4275e2648..828364c38 100644 --- a/r/sedonadb/src/rust/src/ffi.rs +++ b/r/sedonadb/src/rust/src/ffi.rs @@ -18,10 +18,11 @@ use std::sync::Arc; use arrow_array::{ - ffi::FFI_ArrowSchema, + ffi::{from_ffi_and_data_type, FFI_ArrowArray, FFI_ArrowSchema}, ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, + make_array, ArrayRef, }; -use arrow_schema::Schema; +use arrow_schema::{Field, Schema}; use datafusion::catalog::TableProvider; use datafusion_expr::ScalarUDF; use datafusion_ffi::{ @@ -36,6 +37,25 @@ pub fn import_schema(mut xptr: savvy::Sexp) -> savvy::Result { Ok(schema) } +pub fn import_field(mut xptr: savvy::Sexp) -> savvy::Result { + let ffi_schema: &FFI_ArrowSchema = import_xptr(&mut xptr, "nanoarrow_schema")?; + let schema = Field::try_from(ffi_schema)?; + Ok(schema) +} + +pub fn import_array( + mut xptr: savvy::Sexp, + schema_xptr: savvy::Sexp, +) -> savvy::Result<(Field, ArrayRef)> { + let field = import_field(schema_xptr)?; + let ffi_array_ref: &mut FFI_ArrowArray = import_xptr(&mut xptr, "nanoarrow_array")?; + let ffi_array_owned = unsafe { FFI_ArrowArray::from_raw(ffi_array_ref as _) }; + let array_data = + unsafe { from_ffi_and_data_type(ffi_array_owned as _, field.data_type().clone())? }; + let array_ref = make_array(array_data); + Ok((field, array_ref)) +} + pub fn import_array_stream(mut xptr: savvy::Sexp) -> savvy::Result { let ffi_stream: &mut FFI_ArrowArrayStream = import_xptr(&mut xptr, "nanoarrow_array_stream")?; let reader = unsafe { ArrowArrayStreamReader::from_raw(ffi_stream as _)? }; diff --git a/r/sedonadb/src/rust/src/lib.rs b/r/sedonadb/src/rust/src/lib.rs index 07c6f311a..842519087 100644 --- a/r/sedonadb/src/rust/src/lib.rs +++ b/r/sedonadb/src/rust/src/lib.rs @@ -27,6 +27,7 @@ use sedona_proj::register::{configure_global_proj_engine, ProjCrsEngineBuilder}; mod context; mod dataframe; mod error; +mod expression; mod ffi; mod runtime; diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md b/r/sedonadb/tests/testthat/_snaps/expression.md new file mode 100644 index 000000000..5d7452a58 --- /dev/null +++ b/r/sedonadb/tests/testthat/_snaps/expression.md @@ -0,0 +1,170 @@ +# basic expression types can be constructed + + Code + sd_expr_column("foofy") + Output + + foofy + +--- + + Code + sd_expr_literal(1L) + Output + + Int32(1) + +--- + + Code + sd_expr_scalar_function("abs", list(1L)) + Output + + abs(Int32(1)) + +--- + + Code + sd_expr_cast(1L, nanoarrow::na_int64()) + Output + + CAST(Int32(1) AS Int64) + +--- + + Code + sd_expr_alias(1L, "foofy") + Output + + Int32(1) AS foofy + +--- + + Code + sd_expr_binary("+", 1L, 2L) + Output + + Int32(1) + Int32(2) + +--- + + Code + sd_expr_negative(1L) + Output + + (- Int32(1)) + +--- + + Code + sd_expr_aggregate_function("sum", list(1L)) + Output + + sum(Int32(1)) RESPECT NULLS + +# literal expressions can be translated + + Code + sd_eval_expr(quote(1L)) + Output + + Int32(1) + +# column expressions can be translated + + Code + sd_eval_expr(quote(col0), expr_ctx) + Output + + col0 + +--- + + Code + sd_eval_expr(quote(.data$col0), expr_ctx) + Output + + col0 + +--- + + Code + sd_eval_expr(quote(.data[[col_zero]]), expr_ctx) + Output + + col0 + +# function calls with a translation become function calls + + Code + sd_eval_expr(quote(abs(-1L))) + Output + + abs((- Int32(1))) + +--- + + Code + sd_eval_expr(quote(base::abs(-1L))) + Output + + abs((- Int32(1))) + +# function calls without a translation are evaluated in R + + Code + sd_eval_expr(quote(function_without_a_translation(1L))) + Output + + Int32(2) + +# function calls that map to binary expressions are translated + + Code + sd_eval_expr(quote(+2)) + Output + + (- (- Float64(2))) + +--- + + Code + sd_eval_expr(quote(1 + 2)) + Output + + Float64(1) + Float64(2) + +--- + + Code + sd_eval_expr(quote(-2)) + Output + + (- Float64(2)) + +--- + + Code + sd_eval_expr(quote(1 - 2)) + Output + + Float64(1) - Float64(2) + +--- + + Code + sd_eval_expr(quote(1 > 2)) + Output + + Float64(1) > Float64(2) + +# errors that occur during evaluation have reasonable context + + Code + sd_eval_expr(quote(stop("this will error"))) + Condition + Error in `sd_eval_expr()`: + ! Error evaluating translated expression `stop("this will error")` + Caused by error: + ! this will error + diff --git a/r/sedonadb/tests/testthat/_snaps/literal.md b/r/sedonadb/tests/testthat/_snaps/literal.md new file mode 100644 index 000000000..c115e766f --- /dev/null +++ b/r/sedonadb/tests/testthat/_snaps/literal.md @@ -0,0 +1,8 @@ +# literals with Arrow extension metadata can be converted to literals + + Code + as_sedonadb_literal(wk::as_wkb("POINT (0 1)")) + Output + + Binary("1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,63") FieldMetadata { inner: {"ARROW:extension:metadata": "{}", "ARROW:extension:name": "geoarrow.wkb"} } + diff --git a/r/sedonadb/tests/testthat/test-expression.R b/r/sedonadb/tests/testthat/test-expression.R new file mode 100644 index 000000000..f0f3d5af2 --- /dev/null +++ b/r/sedonadb/tests/testthat/test-expression.R @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_that("basic expression types can be constructed", { + expect_snapshot(sd_expr_column("foofy")) + expect_snapshot(sd_expr_literal(1L)) + expect_snapshot(sd_expr_scalar_function("abs", list(1L))) + expect_snapshot(sd_expr_cast(1L, nanoarrow::na_int64())) + expect_snapshot(sd_expr_alias(1L, "foofy")) + expect_snapshot(sd_expr_binary("+", 1L, 2L)) + expect_snapshot(sd_expr_negative(1L)) + expect_snapshot(sd_expr_aggregate_function("sum", list(1L))) +}) + +test_that("casts to a type with extension metadata can't be constructed", { + expect_error( + sd_expr_cast(1L, geoarrow::geoarrow_wkb()), + "Can't cast to Arrow extension type 'geoarrow.wkb'" + ) +}) + +test_that("literal expressions can be translated", { + expect_snapshot(sd_eval_expr(quote(1L))) +}) + +test_that("column expressions can be translated", { + schema <- nanoarrow::na_struct(list(col0 = nanoarrow::na_int32())) + expr_ctx <- sd_expr_ctx(schema) + + expect_snapshot(sd_eval_expr(quote(col0), expr_ctx)) + expect_snapshot(sd_eval_expr(quote(.data$col0), expr_ctx)) + col_zero <- "col0" + expect_snapshot(sd_eval_expr(quote(.data[[col_zero]]), expr_ctx)) + + expect_error( + sd_eval_expr(quote(col1), expr_ctx), + "object 'col1' not found" + ) +}) + +test_that("function calls with a translation become function calls", { + # Should work for the qualified or unqualified versions + expect_snapshot(sd_eval_expr(quote(abs(-1L)))) + expect_snapshot(sd_eval_expr(quote(base::abs(-1L)))) +}) + +test_that("function calls without a translation are evaluated in R", { + function_without_a_translation <- function(x) x + 1L + expect_snapshot(sd_eval_expr(quote(function_without_a_translation(1L)))) +}) + +test_that("function calls that map to binary expressions are translated", { + # + and - are special-cased because in R the unary function calls are valid + expect_snapshot(sd_eval_expr(quote(+2))) + expect_snapshot(sd_eval_expr(quote(1 + 2))) + expect_snapshot(sd_eval_expr(quote(-2))) + expect_snapshot(sd_eval_expr(quote(1 - 2))) + + # normal translation + expect_snapshot(sd_eval_expr(quote(1 > 2))) +}) + +test_that("errors that occur during evaluation have reasonable context", { + function_without_a_translation <- function(x) x + 1L + expect_snapshot(sd_eval_expr(quote(stop("this will error"))), error = TRUE) +}) diff --git a/r/sedonadb/tests/testthat/test-literal.R b/r/sedonadb/tests/testthat/test-literal.R new file mode 100644 index 000000000..a9f09406b --- /dev/null +++ b/r/sedonadb/tests/testthat/test-literal.R @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_that("basic literals can be converted to expressions", { + expect_identical( + as_sedonadb_literal(NULL)$debug_string(), + 'Literal(NULL, None)' + ) + + expect_identical( + as_sedonadb_literal("foofy")$debug_string(), + 'Literal(Utf8("foofy"), None)' + ) + + expect_identical( + as_sedonadb_literal(1L)$debug_string(), + 'Literal(Int32(1), None)' + ) + + expect_identical( + as_sedonadb_literal(1.0)$debug_string(), + 'Literal(Float64(1), None)' + ) + + expect_identical( + as_sedonadb_literal(as.raw(c(1:3)))$debug_string(), + 'Literal(Binary("1,2,3"), None)' + ) +}) + +test_that("literals can request a type", { + expect_identical( + as_sedonadb_literal(1.0, type = nanoarrow::na_float())$debug_string(), + "Cast(Cast { expr: Literal(Float64(1), None), data_type: Float32 })" + ) +}) + +test_that("literals with Arrow extension metadata can be converted to literals", { + expect_snapshot(as_sedonadb_literal(wk::as_wkb("POINT (0 1)"))) +}) + +test_that("non-scalars can't be automatically converted to literals", { + expect_error( + as_sedonadb_literal(1:5)$debug_string(), + "Can't convert non-scalar to sedonadb_expr" + ) +}) diff --git a/r/sedonadb/tools/savvy-update.sh b/r/sedonadb/tools/savvy-update.sh new file mode 100755 index 000000000..ddba62590 --- /dev/null +++ b/r/sedonadb/tools/savvy-update.sh @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +set -eu + +main() { + local -r source_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + local -r source_rpkg_dir="$(cd "${source_dir}/../" && pwd)" + + # Run the updater + savvy-cli update "${source_rpkg_dir}" + + # Post-process files + local -r api_h="${source_rpkg_dir}/src/rust/api.h" + local -r init_c="${source_rpkg_dir}/src/init.c" + local -r wrappers_r="${source_rpkg_dir}/R/000-wrappers.R" + + mv "${api_h}" "${api_h}.tmp" + mv "${init_c}" "${init_c}.tmp" + mv "${wrappers_r}" "${wrappers_r}.tmp" + + # Add license header to api.h + echo "${LICENSE_C}" > "${api_h}" + cat "${api_h}.tmp" >> "${api_h}" + + # Add license header, put includes on their own lines, and fix a typo in init.c + echo "${LICENSE_C}" > "${init_c}" + sed 's/#include/\n#include/g' "${init_c}.tmp" | \ + sed '1s/^\n//' | \ + sed 's/initialzation/initialization/g' >> "${init_c}" + + # Add license header to 000-wrappers.R + echo "${LICENSE_R}" > "${wrappers_r}" + cat "${wrappers_r}.tmp" >> "${wrappers_r}" + + # Run clang-format on the generated C files + clang-format -i "${api_h}" + clang-format -i "${init_c}" + + # Remove .tmp files + rm "${api_h}.tmp" "${init_c}.tmp" "${wrappers_r}.tmp" +} + +LICENSE_R='# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +' +LICENSE_C='// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +' + +main