Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions r/sedonadb/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,14 @@ export(sd_expr_factory)
export(sd_expr_literal)
export(sd_expr_negative)
export(sd_expr_scalar_function)
export(sd_filter)
export(sd_preview)
export(sd_read_parquet)
export(sd_register_udf)
export(sd_select)
export(sd_sql)
export(sd_to_view)
export(sd_transmute)
export(sd_view)
export(sd_write_parquet)
export(sedonadb_adbc)
Expand Down
22 changes: 22 additions & 0 deletions r/sedonadb/R/000-wrappers.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 89 additions & 1 deletion r/sedonadb/R/dataframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ as_sedonadb_dataframe.datafusion_table_provider <- function(x, ..., schema = NUL

#' Count rows in a DataFrame
#'
#' @param .data A sedonadb_dataframe
#' @param .data A sedonadb_dataframe or an object that can be coerced to one.
#'
#' @returns The number of rows after executing the query
#' @export
Expand All @@ -89,6 +89,7 @@ as_sedonadb_dataframe.datafusion_table_provider <- function(x, ..., schema = NUL
#' sd_sql("SELECT 1 as one") |> sd_count()
#'
sd_count <- function(.data) {
.data <- as_sedonadb_dataframe(.data)
.data$df$count()
}

Expand Down Expand Up @@ -193,6 +194,91 @@ sd_preview <- function(.data, n = NULL, ascii = NULL, width = NULL) {
invisible(.data)
}

#' Keep or drop columns of a SedonaDB DataFrame
#'
#' @inheritParams sd_count
#' @param ... One or more bare names. Evaluated like [dplyr::select()].
#'
#' @returns An object of class sedonadb_dataframe
#' @export
#'
#' @examples
#' data.frame(x = 1:10, y = letters[1:10]) |> sd_select(x)
#'
sd_select <- function(.data, ...) {
.data <- as_sedonadb_dataframe(.data)
schema <- nanoarrow::infer_nanoarrow_schema(.data)
ptype <- nanoarrow::infer_nanoarrow_ptype(schema)
loc <- tidyselect::eval_select(rlang::expr(c(...)), data = ptype)

df <- .data$df$select_indices(names(loc), loc - 1L)
new_sedonadb_dataframe(.data$ctx, df)
}

#' Create, modify, and delete columns of a SedonaDB DataFrame
#'
#' @inheritParams sd_count
#' @param ... Named expressions for new columns to create. These are evaluated
#' in the same way as [dplyr::transmute()] except does not support extra
#' dplyr features such as `across()` or `.by`.
#'
#' @returns An object of class sedonadb_dataframe
#' @export
#'
#' @examples
#' data.frame(x = 1:10) |>
#' sd_transmute(y = x + 1L)
#'
sd_transmute <- function(.data, ...) {
.data <- as_sedonadb_dataframe(.data)
expr_quos <- rlang::enquos(...)
env <- parent.frame()

expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data), env)
r_exprs <- expr_quos |> rlang::quos_auto_name() |> lapply(rlang::quo_get_expr)
sd_exprs <- lapply(r_exprs, sd_eval_expr, expr_ctx = expr_ctx, env = env)

# Ensure inputs are given aliases to account for the expected column name
exprs_names <- names(r_exprs)
for (i in seq_along(sd_exprs)) {
name <- exprs_names[i]
if (!is.na(name) && name != "") {
sd_exprs[[i]] <- sd_expr_alias(sd_exprs[[i]], name, expr_ctx$factory)
}
}

df <- .data$df$select(sd_exprs)
new_sedonadb_dataframe(.data$ctx, df)
}

#' Keep rows of a SedonaDB DataFrame that match a condition
#'
#' @inheritParams sd_count
#' @param ... Unnamed expressions for filter conditions. These are evaluated
#' in the same way as [dplyr::filter()] except does not support extra
#' dplyr features such as `across()` or `.by`.
#'
#' @returns An object of class sedonadb_dataframe
#' @export
#'
#' @examples
#' data.frame(x = 1:10) |> sd_filter(x > 5)
#'
sd_filter <- function(.data, ...) {
.data <- as_sedonadb_dataframe(.data)
rlang::check_dots_unnamed()

expr_quos <- rlang::enquos(...)
env <- parent.frame()

expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data), env)
r_exprs <- expr_quos |> lapply(rlang::quo_get_expr)
sd_exprs <- lapply(r_exprs, sd_eval_expr, expr_ctx = expr_ctx, env = env)

df <- .data$df$filter(sd_exprs)
new_sedonadb_dataframe(.data$ctx, df)
}

#' Write DataFrame to (Geo)Parquet files
#'
#' Write this DataFrame to one or more (Geo)Parquet files. For input that contains
Expand Down Expand Up @@ -246,6 +332,8 @@ sd_write_parquet <- function(
geoparquet_version = "1.0",
overwrite_bbox_columns = FALSE
) {
.data <- as_sedonadb_dataframe(.data)

# Determine single_file_output default based on path and partition_by
if (is.null(single_file_output)) {
single_file_output <- length(partition_by) == 0 && grepl("\\.parquet$", path)
Expand Down
1 change: 1 addition & 0 deletions r/sedonadb/R/expression.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ print.SedonaDBExpr <- function(x, ...) {
#'
#' @param expr An R expression (e.g., the result of `quote()`).
#' @param expr_ctx An `sd_expr_ctx()`
#' @param env An evaluation environment. Defaults to the calling environment.
#'
#' @returns A `SedonaDBExpr`
#' @noRd
Expand Down
2 changes: 1 addition & 1 deletion r/sedonadb/man/sd_compute.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion r/sedonadb/man/sd_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions r/sedonadb/man/sd_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion r/sedonadb/man/sd_preview.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions r/sedonadb/man/sd_select.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion r/sedonadb/man/sd_to_view.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions r/sedonadb/man/sd_transmute.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion r/sedonadb/man/sd_write_parquet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions r/sedonadb/src/init.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions r/sedonadb/src/rust/api.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions r/sedonadb/src/rust/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow_array::{RecordBatchIterator, RecordBatchReader};
use datafusion::catalog::MemTable;
use datafusion::prelude::DataFrame;
use datafusion_common::Column;
use datafusion_expr::utils::conjunction;
use datafusion_expr::{select_expr::SelectExpr, Expr, SortExpr};
use datafusion_ffi::table_provider::FFI_TableProvider;
use savvy::{savvy, savvy_err, sexp, IntoExtPtrSexp, Result};
Expand All @@ -33,6 +34,7 @@ use std::{iter::zip, ptr::swap_nonoverlapping, sync::Arc};
use tokio::runtime::Runtime;

use crate::context::InternalContext;
use crate::expression::SedonaDBExprFactory;
use crate::ffi::{import_schema, FFITableProviderR};
use crate::runtime::wait_for_future_captured_r;

Expand Down Expand Up @@ -311,4 +313,21 @@ impl InternalDataFrame {
let inner = self.inner.clone().select(exprs)?;
Ok(new_data_frame(inner, self.runtime.clone()))
}

fn select(&self, exprs_sexp: savvy::Sexp) -> savvy::Result<InternalDataFrame> {
let exprs = SedonaDBExprFactory::exprs(exprs_sexp)?;
let inner = self.inner.clone().select(exprs)?;
Ok(new_data_frame(inner, self.runtime.clone()))
}

fn filter(&self, exprs_sexp: savvy::Sexp) -> savvy::Result<InternalDataFrame> {
let exprs = SedonaDBExprFactory::exprs(exprs_sexp)?;
let inner = if let Some(single_filter) = conjunction(exprs) {
self.inner.clone().filter(single_filter)?
} else {
self.inner.clone()
};

Ok(new_data_frame(inner, self.runtime.clone()))
}
}
Loading
Loading