diff --git a/.github/workflows/all-workflows.yaml b/.github/workflows/all-workflows.yaml index c34e5b77..98d955f1 100644 --- a/.github/workflows/all-workflows.yaml +++ b/.github/workflows/all-workflows.yaml @@ -31,9 +31,10 @@ jobs: # We pass information about the triggering event event_name: ${{ github.event_name }} run_id: ${{ github.run_id }} - backend_exclude: oracle # code-coverage creates data bases for the tests. Here you can specify the schemas you need for the workflow + backend_exclude: sqlite,duckdb,postgres,mssql schemas: test,test.one check_postgres_logs: false + skip: R-CMD-check,styler,render-readme,pkgdown,update-lockfile,update-cache secrets: inherit diff --git a/.github/workflows/oracle-reprex.yaml b/.github/workflows/oracle-reprex.yaml new file mode 100644 index 00000000..c7390d5a --- /dev/null +++ b/.github/workflows/oracle-reprex.yaml @@ -0,0 +1,229 @@ +on: + push + + +jobs: + code-coverage-oracle: + name: "๐Ÿงช Tests: Oracle Database (Experimental)" + runs-on: ubuntu-latest + defaults: + run: + shell: bash + + services: + oracledb: + image: gvenzl/oracle-free:latest + env: + APP_USER: "github_ci" + APP_USER_PASSWORD: "github_ci" + ORACLE_PASSWORD: "github_ci" + ports: + - 1521:1521 + options: >- + --health-cmd healthcheck.sh + --health-interval 20s + --health-timeout 10s + --health-retries 10 + + env: + BACKEND: Oracle + BACKEND_DRV: RJDBC::JDBC + BACKEND_ARGS: '' + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: โฌ‡๏ธ Checkout repo + uses: actions/checkout@v5 + with: + fetch-depth: 0 + persist-credentials: false + + - name: ๐Ÿ”ง Set environment variables + run: | + ORACLEHOST=localhost + + echo "ORACLEHOST=${ORACLEHOST}" >> $GITHUB_ENV + + CONN_ARGS_JSON="{ + \"Oracle\": { + \"driverClass\": \"oracle.jdbc.OracleDriver\", + \"classPath\": \"/usr/lib/oracle/ojdbc8.jar\", + \"url\": \"jdbc:oracle:thin:@${ORACLEHOST}:1521/FREEPDB1\", + \"user\": \"github_ci\", + \"password\": \"github_ci\" + } + }" + + echo "CONN_ARGS_JSON<> $GITHUB_ENV + echo $CONN_ARGS_JSON >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + + - name: ๐Ÿ”ง Install Oracle JDBC driver + run: | + sudo apt-get update + # Create directory for the driver with sudo + sudo mkdir -p /usr/lib/oracle + + # Download the Oracle JDBC driver directly from Maven Central with sudo + sudo curl -o /usr/lib/oracle/ojdbc8.jar https://repo1.maven.org/maven2/com/oracle/database/jdbc/ojdbc8/21.5.0.0/ojdbc8-21.5.0.0.jar + + # Verify the driver was downloaded successfully + if sudo test -f "/usr/lib/oracle/ojdbc8.jar"; then + echo "Oracle JDBC driver downloaded successfully" + sudo ls -la /usr/lib/oracle/ + # Make the JAR file readable by everyone + sudo chmod 644 /usr/lib/oracle/ojdbc8.jar + else + echo "Failed to download Oracle JDBC driver" + exit 1 + fi + + - name: ๐Ÿ”ง Set up Oracle JDK + uses: actions/setup-java@v5 + with: + distribution: oracle + java-version: 25 + + - name: ๐Ÿ”ง Setup R + uses: r-lib/actions/setup-r@v2 + with: + r-version: 'release' + use-public-rspm: true + + - name: ๐Ÿ”ง Install R dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + local::. + any::pak + any::jsonlite + any::rcmdcheck + any::devtools + any::lintr + any::covr + any::roxygen2 + any::pkgdown + any::rmarkdown + any::styler + needs: build, check, coverage, roxygen2, lint, website + + - name: ๐Ÿ”ง Configure Java for R + run: | + # Create .Rprofile to automatically set Java classpath + echo 'Sys.setenv(JAVA_HOME = Sys.getenv("JAVA_HOME"))' > ~/.Rprofile + echo 'Sys.setenv(CLASSPATH = "/usr/lib/oracle/ojdbc8.jar")' >> ~/.Rprofile + + - name: Test Oracle conection + shell: Rscript {0} + run: | + library(RJDBC) + + # Print Java version and classpath to debug + print(system("java -version", intern = TRUE)) + print(Sys.getenv("CLASSPATH")) + print(Sys.getenv("JAVA_HOME")) + + # Initialize the Oracle driver explicitly + drv <- JDBC("oracle.jdbc.OracleDriver", "/usr/lib/oracle/ojdbc8.jar") + print("JDBC driver initialized successfully") + + + devtools::load_all() + # Try to connect + conn <- tryCatch({ + SCDB::get_connection( + drv, + url = "jdbc:oracle:thin:@${{ env.ORACLEHOST }}:1521/FREEPDB1", + user = "github_ci", + password = "github_ci" + ) + }, error = function(e) { + print(paste("Connection error:", e$message)) + NULL + }) + + if (!is.null(conn)) { + print("Successfully connected to Oracle!") + } + + print("class(conn)") + print(class(conn)) + + data <- dplyr::rename_with(iris, ~ toupper(gsub(".", "_", .x, fixed = TRUE))) + + DBI::dbWriteTable(conn, "IRIS", data) + + print(tibble::tibble(DBI::dbReadTable(conn, "IRIS"))) + + r <- DBI::dbSendQuery(conn, paste0("SELECT * FROM IRIS")) + print("class(r)") + print(class(r)) + + out <- DBI::fetch(r) + print(tibble::tibble(out)) + DBI::dbClearResult(r) + + + # Start with some clean up + purrr::walk( + c("test.mtcars", "__mtcars", "__mtcars_historical", "test.mtcars_modified", "mtcars_modified", + "test.SCDB_logs", "test.SCDB_logger", "test.SCDB_tmp1", "test.SCDB_tmp2", "test.SCDB_tmp3", + "test.SCDB_t0", "test.SCDB_t1", "test.SCDB_t2" + ), + ~ if (DBI::dbExistsTable(conn, id(., conn))) DBI::dbRemoveTable(conn, id(., conn)) + ) + + purrr::walk( + c(DBI::Id(schema = "test", table = "one.two"), DBI::Id(schema = "test.one", table = "two")), + ~ if (schema_exists(conn, .@name[["schema"]]) && DBI::dbExistsTable(conn, .)) DBI::dbRemoveTable(conn, .) + ) + + # Copy mtcars to conn + dplyr::copy_to( + conn, mtcars %>% dplyr::mutate(name = rownames(mtcars)), + name = id("test.mtcars", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) + + dplyr::copy_to( + conn, mtcars %>% dplyr::mutate(name = rownames(mtcars)), + name = id("__mtcars", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) + + dplyr::copy_to( + conn, + mtcars %>% + dplyr::mutate(name = rownames(mtcars)) %>% + digest_to_checksum() %>% + dplyr::mutate( + from_ts = as.POSIXct("2020-01-01 09:00:00"), + until_ts = as.POSIXct(NA) + ), + name = id("__mtcars_historical", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) + + dplyr::copy_to( + conn, + mtcars %>% + dplyr::mutate(name = rownames(mtcars)) %>% + digest_to_checksum() %>% + dplyr::mutate( + from_ts = as.POSIXct("2020-01-01 09:00:00"), + until_ts = as.POSIXct(NA) + ), + name = id("MTCARS", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) + + DBI::dbDisconnect(conn) diff --git a/DESCRIPTION b/DESCRIPTION index d00dc101..b0e4b315 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,8 @@ Imports: openssl, parallelly, purrr, + rJava, + RJDBC, rlang, R6, stringr, @@ -63,9 +65,9 @@ Suggests: microbenchmark, odbc, pak, + pkgdown, rmarkdown, roxygen2, - pkgdown, RPostgres, RSQLite, spelling, diff --git a/NAMESPACE b/NAMESPACE index 42f4d565..bf1473b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ S3method(db_timestamp,duckdb_connection) S3method(digest_to_checksum,"tbl_Microsoft SQL Server") S3method(digest_to_checksum,data.frame) S3method(digest_to_checksum,default) +S3method(digest_to_checksum,tbl_Oracle) S3method(digest_to_checksum,tbl_PqConnection) S3method(digest_to_checksum,tbl_duckdb_connection) S3method(dplyr::anti_join,tbl_sql) @@ -26,6 +27,7 @@ S3method(get_catalog,Id) S3method(get_catalog,default) S3method(get_catalog,duckdb_connection) S3method(get_catalog,tbl_dbi) +S3method(get_connection,JDBCDriver) S3method(get_connection,OdbcDriver) S3method(get_connection,PqDriver) S3method(get_connection,SQLiteDriver) @@ -34,6 +36,8 @@ S3method(get_connection,duckdb_driver) S3method(get_schema,"Microsoft SQL Server") S3method(get_schema,"NULL") S3method(get_schema,Id) +S3method(get_schema,JDBCConnection) +S3method(get_schema,Oracle) S3method(get_schema,PqConnection) S3method(get_schema,SQLiteConnection) S3method(get_schema,duckdb_connection) @@ -41,6 +45,7 @@ S3method(get_schema,tbl_dbi) S3method(get_tables,"Microsoft SQL Server") S3method(get_tables,DBIConnection) S3method(get_tables,OdbcConnection) +S3method(get_tables,Oracle) S3method(get_tables,PqConnection) S3method(get_tables,SQLiteConnection) S3method(get_tables,duckdb_connection) @@ -50,6 +55,7 @@ S3method(id,data.frame) S3method(id,tbl_dbi) S3method(interlace,tbl_sql) S3method(schema_exists,DBIConnection) +S3method(schema_exists,Oracle) S3method(schema_exists,SQLiteConnection) S3method(schema_exists,default) S3method(table_exists,DBIConnection) @@ -61,6 +67,7 @@ export(close_connection) export(create_index) export(create_logs_if_missing) export(create_table) +export(db_collect.OracleJdbc) export(db_timestamp) export(defer_db_cleanup) export(digest_to_checksum) @@ -81,10 +88,46 @@ export(table_exists) export(unique_table_name) export(unlock_table) export(update_snapshot) +exportMethods(dbBegin) +exportMethods(dbClearResult) +exportMethods(dbCommit) +exportMethods(dbExistsTable) +exportMethods(dbGetInfo) +exportMethods(dbGetRowsAffected) +exportMethods(dbIsValid) +exportMethods(dbQuoteIdentifier) +exportMethods(dbRollback) +exportMethods(dbSendQuery) +exportMethods(dbSendStatement) +exportMethods(dbWriteTable) +exportMethods(fetch) import(parallelly) importClassesFrom(DBI,DBIConnection) +importClassesFrom(RJDBC,JDBCConnection) +importClassesFrom(RJDBC,JDBCResult) +importClassesFrom(odbc,Oracle) importFrom(R6,R6Class) importFrom(magrittr,"%>%") importFrom(methods,setGeneric) +importFrom(rJava,.jcall) importFrom(rlang,":=") importFrom(rlang,.data) +importMethodsFrom(DBI,dbBegin) +importMethodsFrom(DBI,dbClearResult) +importMethodsFrom(DBI,dbCommit) +importMethodsFrom(DBI,dbExistsTable) +importMethodsFrom(DBI,dbGetInfo) +importMethodsFrom(DBI,dbGetRowsAffected) +importMethodsFrom(DBI,dbIsValid) +importMethodsFrom(DBI,dbQuoteIdentifier) +importMethodsFrom(DBI,dbRollback) +importMethodsFrom(DBI,dbSendQuery) +importMethodsFrom(DBI,dbSendStatement) +importMethodsFrom(DBI,dbWriteTable) +importMethodsFrom(DBI,fetch) +importMethodsFrom(RJDBC,dbDataType) +importMethodsFrom(RJDBC,dbExistsTable) +importMethodsFrom(RJDBC,dbSendQuery) +importMethodsFrom(RJDBC,dbWriteTable) +importMethodsFrom(odbc,dbDataType) +importMethodsFrom(odbc,sqlCreateTable) diff --git a/R/Logger.R b/R/Logger.R index 51e9fec8..54ddd0cc 100644 --- a/R/Logger.R +++ b/R/Logger.R @@ -190,7 +190,8 @@ Logger <- R6::R6Class( dest = private$log_conn, df = patch, name = unique_table_name("SCDB_logger_patch"), - temporary = TRUE + temporary = TRUE, + analyze = FALSE ) defer_db_cleanup(patch) # Clean up on exit @@ -341,7 +342,8 @@ Logger <- R6::R6Class( dest = private$log_conn, df = patch, name = unique_table_name("SCDB_logger_patch"), - temporary = TRUE + temporary = TRUE, + analyze = FALSE ) defer_db_cleanup(patch) # Clean up on exit diff --git a/R/backend_oracle.R b/R/backend_oracle.R new file mode 100644 index 00000000..94262953 --- /dev/null +++ b/R/backend_oracle.R @@ -0,0 +1,457 @@ +# dbplyr needs additional implementation for Oracle to work. + +#' @importClassesFrom RJDBC JDBCConnection +#' @importClassesFrom odbc Oracle +setClass( + "OracleJdbc", + slots = list( + "jdbc_conn" = "JDBCConnection", + "jc" = "jobjRef", + "servername" = "character", + "options" = "list" + ), + contains = "Oracle" +) + +#' @importClassesFrom RJDBC JDBCResult +setClass( + "OracleJdbcResult", + slots = list( + "jdbc_result" = "JDBCResult" + ), + contains = "JDBCResult" +) + + +# DBI methods defined in RJDBC package +# dbOption +# dbListConnections +# dbGetInfo +# dbUnloadDriver +# dbConnect +# dbDisconnect +# dbIsValid - mapped +# dbSendQuery - mapped +# dbSendUpdate +# dbGetQuery +# dbGetException +# dbGetInfo +# dbListResults +# dbListTables +# dbGetTables +# dbExistsTable - mapped +# dbRemoveTable +# dbListFields +# dbGetFields +# dbReadTable +# dbReadTable +# dbDataType +# dbWriteTable - mapped +# dbCommit +# dbRollback - mapped +# dbBegin - mapped +# dbClearResult +# dbGetInfo - mapped +# dbHasCompleted +# dbColumnInfo + +# Additonal implementations +# dbQuoteIdentifier +# dbSendStatement +# dbCollect + +#' @export +db_collect.OracleJdbc <- function(con, sql, n = -1, ...) { + DBI::dbGetQuery(con, sql, n, ...) +} + + +#' @importMethodsFrom DBI dbWriteTable +#' @importMethodsFrom RJDBC dbWriteTable +#' @exportMethod dbWriteTable +setMethod( + "dbWriteTable", + signature( + conn = "OracleJdbc", + name = "character", + value = "data.frame" + ), + function(conn, name, value, ...) { + DBI::dbWriteTable(conn@jdbc_conn, id(name, conn@jdbc_conn), value, ...) + } +) + +#' @importMethodsFrom DBI dbWriteTable +#' @importMethodsFrom RJDBC dbWriteTable +#' @exportMethod dbWriteTable +setMethod( + "dbWriteTable", + signature( + conn = "OracleJdbc", + name = "Id", + value = "data.frame" + ), + function(conn, name, value, ...) { + DBI::dbWriteTable(conn, DBI::dbQuoteIdentifier(conn@jdbc_conn, name), value, ...) + } +) + +#' @importMethodsFrom DBI dbWriteTable +#' @importMethodsFrom RJDBC dbWriteTable +#' @exportMethod dbWriteTable +setMethod( + "dbWriteTable", + signature( + conn = "OracleJdbc", + name = "SQL", + value = "data.frame" + ), + function(conn, name, value, ...) { + + names(value) <- as.character(DBI::dbQuoteIdentifier(conn@jdbc_conn, names(value))) + + DBI::dbWriteTable(conn@jdbc_conn, name, value) + } +) + + +# # The ANALYZE TABLE command generated by dplyr does not work for Oracle, so we manually implement. +# #' DBI::dbSendQuery@exportS3Method dbplyr::sql_table_analyze +# #' @noRd +# sql_table_analyze.Oracle <- function(con, table, ...) { +# dbplyr::build_sql( +# "ANALYZE TABLE ", +# dbplyr::as.sql(id(table, conn = con), con = con), +# " COMPUTE STATISTICS", +# con = con +# ) +# }dbExistsTable + +#' @importMethodsFrom DBI dbExistsTable +#' @importMethodsFrom RJDBC dbExistsTable +#' @exportMethod dbExistsTable +setMethod( + "dbExistsTable", + signature( + conn = "OracleJdbc", + name = "Id" + ), + function(conn, name, ...) { + DBI::dbExistsTable(conn@jdbc_conn, name, ...) + } +) + +#' @importFrom rJava .jcall +#' @importMethodsFrom DBI dbGetRowsAffected +#' @exportMethod dbGetRowsAffected +#' @noRd +setMethod( + "dbGetRowsAffected", + signature( + res = "JDBCResult" + ), + function(res, ...) { + if (!is.null(res@stat)) { + tryCatch({ + cnt <- rJava::.jcall(res@stat, "I", "getUpdateCount") + return(if (cnt < 0) 0L else as.integer(cnt)) + }, error = function(e) { + return(NA_integer_) + }) + } + return(NA_integer_) + } +) + +#' @importMethodsFrom DBI dbQuoteIdentifier +#' @exportMethod dbQuoteIdentifier +setMethod( + "dbQuoteIdentifier", + signature( + conn = "OracleJdbc", + x = "character" + ), + function(conn, x, ...) { + x <- enc2utf8(x) + + reserved_words <- c("DATE", "NUMBER", "VARCHAR") + + needs_escape <- (grepl("^[a-zA-Z_]", x) & toupper(x) != x) | tolower(x) %in% reserved_words + + x[needs_escape] <- paste0("\"", gsub("\"", "\"\"", x[needs_escape]), "\"") + + return(DBI::SQL(x, names = names(x))) + } +) + +#' @importMethodsFrom DBI dbQuoteIdentifier +#' @exportMethod dbQuoteIdentifier +setMethod( + "dbQuoteIdentifier", + signature( + conn = "OracleJdbc", + x = "SQL" + ), + function(conn, x, ...) { + return(x) # Remove ambiguity (also assume already quoted) + } +) + +#' @importMethodsFrom DBI dbQuoteIdentifier +#' @exportMethod dbQuoteIdentifier +setMethod( + "dbQuoteIdentifier", + signature( + conn = "OracleJdbc", + x = "Id" + ), + function(conn, x, ...) { + + # For `Id`, run on each non-NA element + return(DBI::SQL(paste0(DBI::dbQuoteIdentifier(conn@jdbc_conn, purrr::discard(x@name, is.na)), collapse = "."))) + } +) + + + +#' @importMethodsFrom DBI dbSendQuery +#' @importMethodsFrom RJDBC dbSendQuery +#' @exportMethod dbSendQuery +setMethod( + "dbSendQuery", + signature( + conn = "OracleJdbc", + statement = "character" + ), + function(conn, statement, ...) { + res <- DBI::dbSendQuery(conn@jdbc_conn, statement) + + result <- new("OracleJdbcResult") + + for (slot_name in slotNames(res)) { + slot(result, slot_name) <- slot(res, slot_name) + } + + return(result) + } +) + +#' @importMethodsFrom DBI dbIsValid +#' @exportMethod dbIsValid +setMethod( + "dbIsValid", + signature( + dbObj = "OracleJdbc" + ), + function(dbObj, ...) { + DBI::dbIsValid(dbObj@jdbc_conn, ...) + } +) + +#' @importMethodsFrom DBI dbBegin +#' @exportMethod dbBegin +setMethod( + "dbBegin", + signature( + conn = "OracleJdbc" + ), + function(conn, ...) { + DBI::dbBegin(conn@jdbc_conn, ...) + } +) + +#' @importMethodsFrom DBI dbRollback +#' @exportMethod dbRollback +setMethod( + "dbRollback", + signature( + conn = "OracleJdbc" + ), + function(conn, ...) { + DBI::dbRollback(conn@jdbc_conn, ...) + } +) + +#' @importMethodsFrom DBI dbCommit +#' @exportMethod dbCommit +setMethod( + "dbCommit", + signature( + conn = "OracleJdbc" + ), + function(conn, ...) { + DBI::dbCommit(conn@jdbc_conn, ...) + } +) + +#' @importMethodsFrom DBI dbGetInfo +#' @exportMethod dbGetInfo +setMethod( + "dbGetInfo", + signature( + dbObj = "OracleJdbc" + ), + function(dbObj, ...) { + modifyList( + DBI::dbGetInfo(dbObj@jdbc_conn, ...), + list( + "servername" = dbObj@servername, + "port" = "" + ) + ) + } +) + +#' @importMethodsFrom DBI dbSendStatement +#' @exportMethod dbSendStatement +setMethod( + "dbSendStatement", + signature( + conn = "OracleJdbc", + statement = "character" + ), + function(conn, statement, ...) { + res <- DBI::dbSendQuery(conn@jdbc_conn, statement, ...) + + result <- new("OracleJdbcResult") + + for (slot_name in slotNames(res)) { + slot(result, slot_name) <- slot(res, slot_name) + } + + return(result) + } +) + +#' @importMethodsFrom DBI fetch +#' @exportMethod fetch +setMethod( + "fetch", + signature( + res = "OracleJdbcResult", + n = "numeric" + ), + function(res, n, ...) { + rjdbc_fetch(res, n, ...) + } +) + +#' @importMethodsFrom DBI fetch +#' @exportMethod fetch +setMethod( + "fetch", + signature( + res = "OracleJdbcResult" + ), + function(res, ...) { + rjdbc_fetch(res, n = -1, ...) + } +) + + +#' @importMethodsFrom DBI dbClearResult +#' @exportMethod dbClearResult +setMethod( + "dbClearResult", + signature( + res = "OracleJdbcResult" + ), + function(res, ...) { + callNextMethod(res, ...) + } +) + + + +# RJDBC has seemingly stopped active development but their latest version of +# `fetch` is needed to retrieve results from oracle in a meaningful manor. +# We implement a minimal (always lossy) version of that function here +rjdbc_fetch <- function( + res, + n = -1, + block = 2048L, + ... +) { + + cols <- rJava::.jcall(res@md, "I", "getColumnCount") + block <- as.integer(block) + if (length(block) != 1L) stop("invalid block size") + if (cols < 1L) return(NULL) + l <- vector("list", cols) + cts <- rep(0L, cols) ## column type (as per JDBC) + rts <- rep(0L, cols) ## retrieval types (0 = string, 1 = double, 2 = integer, 3 = POSIXct) + for (i in 1:cols) { + ## possible retrieval: + ## getDouble(), getTimestamp() and getString() + ## [NOTE: getBigDecimal() is native for all numeric() types] + ## could check java.sql.Timestamp which has .getTime() in millis + cts[i] <- ct <- rJava::.jcall(res@md, "I", "getColumnType", i) + l[[i]] <- character() + ## NOTE: this is also needed in dbColumnInfo() - see also JDBC.types + ## -7 BIT, -6 TINYINT, 5 SMALLINT, 4 INTEGER, -5 BIGINT + ## 6 FLOAT, 7 REAL, 8 DOUBLE, 2 NUMERIC, 3 DECIMAL + ## 1 CHAR, 12 VARCHAR, -1 LONGVARCHAR + ## 91 DATE, 92 TIME, 93 TIMESTAMP + ## -2 BINARY, -3 VARBINARY, -4 LONGVARBINARY + ## 0 NULL, 1111 OTHER, 2000 JAVA_OBJECT + ## 16 BOOLEAN, 1.8+: 2013 TIME_WITH_TIMEZONE, + ## 2014 TIMESTAMP_WITH_TIMEZONE + ## + ## integer-compatible types + if (ct == 4L || ct == 5L || ct == -6L) { + l[[i]] <- integer() + rts[i] <- 2L + } else if (ct == -5L || (ct >= 2L && ct <= 8L)) { ## BIGINT and various float/num types + l[[i]] <- numeric() + rts[i] <- 1L + } else if (ct >= 91L && ct <= 93L) { ## DATE/TIME/TS + l[[i]] <- as.POSIXct(numeric()) + rts[i] <- 3L + } else if (ct == -7L) { ## BIT + l[[i]] <- logical() + rts[i] <- 4L + } + names(l)[i] <- rJava::.jcall(res@md, "S", "getColumnLabel", i) + } + + rp <- res@env$pull + if (rJava::is.jnull(rp)) { + rp <- rJava::.jnew( + "info/urbanek/Rpackage/RJDBC/JDBCResultPull", + rJava::.jcast(res@jr, "java/sql/ResultSet"), + rJava::.jarray(as.integer(rts)) + ) + res@env$pull <- rp + } + + ret_fn <- list( ## retrieval functions for the different types + function(i) rJava::.jcall(rp, "[Ljava/lang/String;", "getStrings", i), + function(i) rJava::.jcall(rp, "[D", "getDoubles", i), + function(i) rJava::.jcall(rp, "[I", "getIntegers", i), + function(i) rJava::.jcall(rp, "[Ljava/lang/String;", "getStrings", i), + function(i) as.logical(rJava::.jcall(rp, "[I", "getIntegers", i)) + ) + + if (n < 0L) { ## infinite pull - collect (using pairlists) & join + stride <- 32768L ## start fairly small to support tiny queries and increase later + while ((nrec <- rJava::.jcall(rp, "I", "fetch", stride, block)) > 0L) { + for (i in seq.int(cols)) { + l[[i]] <- pairlist(l[[i]], ret_fn[[rts[i] + 1L]](i)) + } + if (nrec < stride) break + stride <- 524288L # 512k + } + for (i in seq.int(cols)) l[[i]] <- unlist(l[[i]], TRUE, FALSE) + } else { + nrec <- rJava::.jcall(rp, "I", "fetch", as.integer(n), block) + for (i in seq.int(cols)) l[[i]] <- ret_fn[[rts[i] + 1L]](i) + } + ## unlisting can strip attrs so do POSIXct at the end for TSs + ts_col <- rts == 3L + if (any(ts_col)) for (i in which(ts_col)) l[[i]] <- as.POSIXct(l[[i]]) + # as.data.frame is expensive - create it on the fly from the list + attr(l, "row.names") <- c(NA_integer_, length(l[[1]])) + class(l) <- "data.frame" + + return(l) +} diff --git a/R/connection.R b/R/connection.R index 804f94f3..8f0cc992 100644 --- a/R/connection.R +++ b/R/connection.R @@ -7,15 +7,6 @@ #' Certain drivers may use credentials stored in a file, such as ~/.pgpass (PostgreSQL). #' @param drv (`DBIDriver(1)` or `DBIConnection(1)`)\cr #' The driver for the connection (defaults to `SQLiteDriver`). -#' @param dbname (`character(1)`)\cr -#' Name of the database located at the host. -#' @param bigint (`character(1)`)\cr -#' The datatype to convert integers to. -#' Support depends on the database backend. -#' @param timezone (`character(1)`)\cr -#' Sets the timezone of DBI::dbConnect(). Must be in [OlsonNames()]. -#' @param timezone_out (`character(1)`)\cr -#' Sets the timezone_out of DBI::dbConnect(). Must be in [OlsonNames()]. #' @param ... #' Additional parameters sent to DBI::dbConnect(). #' @return @@ -38,6 +29,11 @@ get_connection <- function(drv, ...) { } #' @rdname get_connection +#' @param dbname (`character(1)`)\cr +#' Name of the database located at the host. +#' @param bigint (`character(1)`)\cr +#' The datatype to convert integers to. +#' Support depends on the database backend. #' @seealso [RSQLite::SQLite] #' @export get_connection.SQLiteDriver <- function( @@ -74,12 +70,16 @@ get_connection.SQLiteDriver <- function( #' The ip of the host to connect to. #' @param port (`numeric(1)` or `character(1)`)\cr #' Host port to connect to. -#' @param password (`character(1)`)\cr -#' Password to login with. #' @param user (`character(1)`)\cr #' Username to login with. +#' @param password (`character(1)`)\cr +#' Password to login with. #' @param check_interrupts (`logical(1)`)\cr #' Should user interrupts be checked during the query execution? +#' @param timezone (`character(1)`)\cr +#' Sets the timezone of DBI::dbConnect(). Must be in [OlsonNames()]. +#' @param timezone_out (`character(1)`)\cr +#' Sets the timezone_out of DBI::dbConnect(). Must be in [OlsonNames()]. #' @seealso [RPostgres::Postgres] #' @export get_connection.PqDriver <- function( @@ -87,8 +87,8 @@ get_connection.PqDriver <- function( dbname = NULL, host = NULL, port = NULL, - password = NULL, user = NULL, + password = NULL, ..., bigint = c("integer", "bigint64", "numeric", "character"), check_interrupts = TRUE, @@ -113,8 +113,8 @@ get_connection.PqDriver <- function( port <- as.numeric(port) } checkmate::assert_numeric(port, null.ok = TRUE, add = coll) - checkmate::assert_character(password, null.ok = TRUE, add = coll) checkmate::assert_character(user, null.ok = TRUE, add = coll) + checkmate::assert_character(password, null.ok = TRUE, add = coll) checkmate::assert_logical(check_interrupts, add = coll) checkmate::assert_choice(timezone, OlsonNames(), null.ok = TRUE, add = coll) checkmate::assert_choice(timezone_out, OlsonNames(), null.ok = TRUE, add = coll) @@ -207,6 +207,70 @@ get_connection.duckdb_driver <- function( return(do.call(DBI::dbConnect, args = args)) } + +#' @rdname get_connection +#' @param driverClass (`character(1)`)\cr +#' The name of the JDBC driver to load. +#' @param classPath (`character(1)`)\cr +#' The path to the JDBC driver. +#' @param url (`character(1)`)\cr +#' The source to connect to. +#' @seealso [RJDBC::JDBC] +#' @export +get_connection.JDBCDriver <- function( + drv, + driverClass = NULL, # nolint: object_name_linter + classPath = NULL, # nolint: object_name_linter + url = NULL, + user = NULL, + password = NULL, + ...) { + + # Store the given arguments + args <- as.list(rlang::current_env()) %>% + utils::modifyList(list(...)) %>% + unlist() + args <- args[match(unique(names(args)), names(args))] + + # Check arguments + coll <- checkmate::makeAssertCollection() + checkmate::assert_character(driverClass, null.ok = TRUE, add = coll) + checkmate::assert_character(classPath, null.ok = TRUE, add = coll) + checkmate::assert_character(url, null.ok = TRUE, add = coll) + checkmate::assert_character(user, null.ok = TRUE, add = coll) + checkmate::assert_character(password, null.ok = TRUE, add = coll) + checkmate::reportAssertions(coll) + + # Check if connection can be established given these settings + status <- do.call(DBI::dbCanConnect, args = args) + if (!status) stop(attr(status, "reason"), call. = FALSE) + + # Connect + conn <- do.call(DBI::dbConnect, args = args) + + # Assume connection is to an Oracle data base + rlang::warn( + message = paste0( + "Connections of class '", + class(conn), + "' are experimental and are assumed to be connections to Oracle databases" + ), + .frequency = "regularly", + .frequency_id = "JDBC means Oracle warning", + call. = FALSE + ) + + conn <- new( + "OracleJdbc", + jdbc_conn = conn, + jc = conn@jc, + servername = url, + options = list("fetch.lossy" = TRUE) + ) + + return(conn) +} + #' @rdname get_connection #' @export #' @importFrom magrittr %>% diff --git a/R/create_index.R b/R/create_index.R index 10b740c1..5195170b 100644 --- a/R/create_index.R +++ b/R/create_index.R @@ -72,7 +72,8 @@ create_index.DBIConnection <- function(conn, db_table, columns) { stringr::str_replace_all(stringr::fixed("."), "_") query <- glue::glue( - "CREATE UNIQUE INDEX {index} ON {as.character(db_table, explicit = TRUE)} ({toString(columns)})" + "CREATE UNIQUE INDEX {index} ON {as.character(db_table, explicit = TRUE)} ", + "({toString(DBI::dbQuoteIdentifier(conn, columns))})" ) DBI::dbExecute(conn, query) diff --git a/R/create_logs_if_missing.R b/R/create_logs_if_missing.R index 8e1556c3..ba9d97a0 100644 --- a/R/create_logs_if_missing.R +++ b/R/create_logs_if_missing.R @@ -33,7 +33,7 @@ create_logs_if_missing <- function(conn, log_table) { log_file = character(0) ) - if (!checkmate::test_multi_class(conn, c("Microsoft SQL Server", "duckdb_connection"))) { + if (!checkmate::test_multi_class(conn, c("Microsoft SQL Server", "duckdb_connection", "JBDCConnection"))) { log_signature <- dplyr::select(log_signature, !"catalog") } diff --git a/R/create_table.R b/R/create_table.R index b825a207..01d5eb47 100644 --- a/R/create_table.R +++ b/R/create_table.R @@ -13,6 +13,7 @@ #' create_table(mtcars, conn = conn, db_table = "mtcars") #' #' close_connection(conn) +#' @importMethodsFrom odbc sqlCreateTable #' @export create_table <- function(.data, conn = NULL, db_table, ...) { # nolint: function_argument_linter diff --git a/R/db_joins.R b/R/db_joins.R index 9d0e75d6..82c9c6ce 100644 --- a/R/db_joins.R +++ b/R/db_joins.R @@ -68,6 +68,11 @@ join_na_sql.tbl_dbi <- function(x, by, na_by) { return(join_na_not_null(by = by, na_by = na_by)) } +#' @noRd +`join_na_sql.tbl_Oracle` <- function(x, by, na_by) { + return(join_na_not_null(by = by, na_by = na_by)) +} + #' Get colnames to select #' #' @inheritParams left_join diff --git a/R/digest_to_checksum.R b/R/digest_to_checksum.R index 93ede804..f25ebaf7 100644 --- a/R/digest_to_checksum.R +++ b/R/digest_to_checksum.R @@ -51,6 +51,24 @@ digest_to_checksum <- function(.data, col = "checksum", exclude = NULL) { return(.data) } +#' @export +`digest_to_checksum.tbl_Oracle` <- function( + .data, + col = formals(digest_to_checksum)$col, + exclude = formals(digest_to_checksum)$exclude) { + + hash_cols <- DBI::dbQuoteIdentifier(.data$src$con, setdiff(colnames(.data), c(col, exclude))) + + .data <- dplyr::mutate( + .data, + !!col := !!dplyr::sql( + glue::glue("RAWTOHEX(STANDARD_HASH({paste0(hash_cols, collapse = ' || ')}, 'SHA256'))") + ) + ) + + return(.data) +} + #' @export digest_to_checksum.default <- function( .data, @@ -75,7 +93,7 @@ digest_to_checksum.default <- function( id__ = dplyr::row_number(), dplyr::across(tidyselect::all_of(col), openssl::md5) ) %>% - dplyr::copy_to(dbplyr::remote_con(.data), df = ., name = unique_table_name("SCDB_digest_to_checksum_helper")) + dplyr::copy_to(dbplyr::remote_con(.data), df = ., name = unique_table_name("SCDB_digest_to_checksum_helper"), analyze = FALSE) defer_db_cleanup(checksums) .data <- .data %>% diff --git a/R/getTableSignature.R b/R/getTableSignature.R index 0ec36f1e..6c79b175 100644 --- a/R/getTableSignature.R +++ b/R/getTableSignature.R @@ -12,11 +12,13 @@ methods::setGeneric("getTableSignature", function(.data, conn = NULL) standardGeneric("getTableSignature"), signature = "conn") +#' @importMethodsFrom RJDBC dbDataType +#' @importMethodsFrom odbc dbDataType #' @importClassesFrom DBI DBIConnection methods::setMethod("getTableSignature", "DBIConnection", function(.data, conn) { # Retrieve the translated data types - signature <- as.list(DBI::dbDataType(conn, dplyr::collect(utils::head(.data, 0)))) + signature <- purrr::map(utils::head(.data, 0), ~ DBI::dbDataType(conn, .)) # Define the column types to be updated based on backend class backend_coltypes <- list( @@ -39,6 +41,11 @@ methods::setMethod("getTableSignature", "DBIConnection", function(.data, conn) { checksum = "char(32)", from_ts = "TIMESTAMP", until_ts = "TIMESTAMP" + ), + "OracleJdbc" = c( + checksum = "CHAR(32)", + from_ts = "TIMESTAMP", + until_ts = "TIMESTAMP" ) ) diff --git a/R/get_schema.R b/R/get_schema.R index 7605166c..b261791d 100644 --- a/R/get_schema.R +++ b/R/get_schema.R @@ -102,6 +102,16 @@ get_schema.duckdb_connection <- function(obj, ...) { return("main") } +#' @export +get_schema.Oracle <- function(obj, ...) { + return(DBI::dbGetQuery(obj, "SELECT user FROM dual")$USER) +} + +#' @export +get_schema.JDBCConnection <- function(obj, ...) { + return(DBI::dbGetQuery(obj, "SELECT user FROM dual")$USER) +} + #' @export get_schema.NULL <- function(obj, ...) { return(NULL) diff --git a/R/get_tables.R b/R/get_tables.R index f03d21d4..27974960 100644 --- a/R/get_tables.R +++ b/R/get_tables.R @@ -170,6 +170,47 @@ get_tables.duckdb_connection <- function(conn, pattern = NULL, show_temporary = } +#' @importFrom rlang .data +#' @export +get_tables.Oracle <- function(conn, pattern = NULL, show_temporary = TRUE) { + query <- paste( + "SELECT", + "owner AS \"schema\",", + "table_name AS \"table\"", + "FROM all_tables", + "WHERE owner NOT IN ('SYS', 'SYSTEM', 'SYSAUX', 'CTXSYS', 'MDSYS', 'OLAPSYS',", + "'ORDDATA', 'ORDSYS', 'OUTLN', 'WMSYS', 'XDB', 'APEX_PUBLIC_USER',", + "'DBSNMP', 'DIP', 'GSMADMIN_INTERNAL', 'ORACLE_OCM', 'ORDS_METADATA',", + "'ORDS_PUBLIC_USER', 'SPATIAL_CSW_DEBUG_USR', 'SPATIAL_WFS_DEBUG_USR',", + "'APPQOSSYS', 'DBSFWUSER', 'LBACSYS',", + "'GSMCATUSER', 'MDDATA', 'SYSBACKUP', 'SYSDG', 'SYSKM', 'SYSMAN')", + "UNION ALL", + "SELECT", + "owner AS \"schema\",", + "view_name AS \"table\"", + "FROM all_views", + "WHERE owner NOT IN ('SYS', 'SYSTEM', 'SYSAUX', 'CTXSYS', 'MDSYS', 'OLAPSYS',", + "'ORDDATA', 'ORDSYS', 'OUTLN', 'WMSYS', 'XDB', 'APEX_PUBLIC_USER',", + "'DBSNMP', 'DIP', 'GSMADMIN_INTERNAL', 'ORACLE_OCM', 'ORDS_METADATA',", + "'ORDS_PUBLIC_USER', 'SPATIAL_CSW_DEBUG_USR', 'SPATIAL_WFS_DEBUG_USR',", + "'APPQOSSYS', 'DBSFWUSER', 'LBACSYS',", + "'GSMCATUSER', 'MDDATA', 'SYSBACKUP', 'SYSDG', 'SYSKM', 'SYSMAN')", + "ORDER BY \"schema\", \"table\"" + ) + + tables <- DBI::dbGetQuery(conn, query) + + if (!is.null(pattern)) { + tables <- tables %>% + dplyr::mutate(db_table_str = paste(.data$schema, .data$table, sep = ".")) %>% + dplyr::filter(grepl(pattern, .data$db_table_str)) %>% + dplyr::select(!"db_table_str") + } + + return(tables) +} + + #' @export get_tables.OdbcConnection <- function(conn, pattern = NULL, show_temporary = TRUE) { query <- paste("SELECT", diff --git a/R/id.R b/R/id.R index 0f0dabcc..ceb59f5d 100644 --- a/R/id.R +++ b/R/id.R @@ -114,6 +114,11 @@ id.tbl_dbi <- function(db_table, ...) { stop("Unknown table specification", call. = FALSE) } + # Unquote table names for Oracle backend + if (inherits(table_conn, "OracleJdbc")) { + components <- stringr::str_remove_all(components, '\"') + } + table <- purrr::pluck(components, 1) schema <- purrr::pluck(components, 2) catalog <- purrr::pluck(components, 3) diff --git a/R/locks.R b/R/locks.R index 6dd6e056..5dbedb66 100644 --- a/R/locks.R +++ b/R/locks.R @@ -58,7 +58,8 @@ lock_table <- function(conn, db_table, schema = NULL) { "pid" = numeric(0) ), db_lock_table_id, - temporary = FALSE + temporary = FALSE, + analyze = FALSE ) if (inherits(conn, "PqConnection")) { # PostgreSQL needs an index for rows_insert @@ -89,7 +90,8 @@ lock_table <- function(conn, db_table, schema = NULL) { "pid" = Sys.getpid(), "lock_start" = as.numeric(Sys.time()) ), - name = unique_table_name("SCDB_lock") + name = unique_table_name("SCDB_lock"), + analyze = FALSE ) defer_db_cleanup(lock) @@ -174,7 +176,8 @@ unlock_table <- function(conn, db_table, schema = NULL, pid = Sys.getpid()) { "table" = purrr::pluck(db_table_id, "name", "table"), "pid" = pid ), - name = unique_table_name("SCDB_lock") + name = unique_table_name("SCDB_lock"), + analyze = FALSE ) defer_db_cleanup(lock) diff --git a/R/schema_exists.R b/R/schema_exists.R index 4f67f668..e2d9e775 100644 --- a/R/schema_exists.R +++ b/R/schema_exists.R @@ -36,6 +36,14 @@ schema_exists.DBIConnection <- function(conn, schema) { return(nrow(result) == 1) } +#' @export +schema_exists.Oracle <- function(conn, schema) { + query <- paste0("SELECT * FROM ALL_USERS WHERE USERNAME = '", schema, "'") + result <- DBI::dbGetQuery(conn, query) + + return(nrow(result) == 1) +} + #' @export schema_exists.default <- function(conn, schema) { diff --git a/R/update_snapshot.R b/R/update_snapshot.R index 11562bbb..5b9d28a3 100644 --- a/R/update_snapshot.R +++ b/R/update_snapshot.R @@ -190,7 +190,7 @@ update_snapshot <- function(.data, conn, db_table, timestamp, filters = NULL, me # Copy to the target connection if needed if (!identical(dbplyr::remote_con(.data), conn)) { - .data <- dplyr::copy_to(conn, .data, name = unique_table_name("SCDB_update_snapshot_input")) + .data <- dplyr::copy_to(conn, .data, name = unique_table_name("SCDB_update_snapshot_input"), analyze = FALSE) defer_db_cleanup(.data) } @@ -221,7 +221,7 @@ update_snapshot <- function(.data, conn, db_table, timestamp, filters = NULL, me # Apply filter to current records if (!is.null(filters) && !identical(dbplyr::remote_con(filters), conn)) { - filters <- dplyr::copy_to(conn, filters, name = unique_table_name("SCDB_update_snapshot_filter")) + filters <- dplyr::copy_to(conn, filters, name = unique_table_name("SCDB_update_snapshot_filter"), analyze = FALSE) defer_db_cleanup(filters) } db_table <- filter_keys(db_table, filters) diff --git a/docker-specs b/docker-specs new file mode 100644 index 00000000..d22f0d28 --- /dev/null +++ b/docker-specs @@ -0,0 +1,16 @@ +FROM r-base:latest + +# tex packages are installed in /root/bin so we have to make sure those +# packages accessible by adding that directory to the PATH variable. +ENV PATH="/opt/hostedtoolcache/pandoc/3.1.11/x64:/snap/bin:/home/runner/.local/bin:/opt/pipx_bin:/home/runner/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/root/bin" + +# system dependencies (apt-get since r-base is debian based) +RUN apt-get update; \ + apt-get install -y build-essential gfortran\ + libapparmor-dev libboost-all-dev libcairo2-dev libcurl4-gnutls-dev\ + libfontconfig1-dev libgsl-dev libjpeg-dev liblapack-dev libpng-dev\ + libproj-dev libsodium-dev libssl-dev libudunits2-dev libxml2-dev\ + mesa-common-dev libglu1-mesa-dev libharfbuzz-dev libfribidi-dev\ + default-jre default-jdk pandoc git gnupg; + +RUN R -e 'install.packages(pak)' diff --git a/inst/WORDLIST b/inst/WORDLIST index 740f2ec9..8fdc9a2d 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -37,6 +37,8 @@ IANA Institut ip +JDBC + Kimball Lasse diff --git a/man/get_connection.Rd b/man/get_connection.Rd index 4c474332..27d354c1 100644 --- a/man/get_connection.Rd +++ b/man/get_connection.Rd @@ -6,6 +6,7 @@ \alias{get_connection.PqDriver} \alias{get_connection.OdbcDriver} \alias{get_connection.duckdb_driver} +\alias{get_connection.JDBCDriver} \alias{get_connection.default} \title{Opens connection to the database} \usage{ @@ -23,8 +24,8 @@ get_connection(drv, ...) dbname = NULL, host = NULL, port = NULL, - password = NULL, user = NULL, + password = NULL, ..., bigint = c("integer", "bigint64", "numeric", "character"), check_interrupts = TRUE, @@ -49,6 +50,16 @@ get_connection(drv, ...) timezone_out = Sys.timezone() ) +\method{get_connection}{JDBCDriver}( + drv, + driverClass = NULL, + classPath = NULL, + url = NULL, + user = NULL, + password = NULL, + ... +) + \method{get_connection}{default}(drv, ...) } \arguments{ @@ -70,12 +81,12 @@ The ip of the host to connect to.} \item{port}{(\code{numeric(1)} or \code{character(1)})\cr Host port to connect to.} -\item{password}{(\code{character(1)})\cr -Password to login with.} - \item{user}{(\code{character(1)})\cr Username to login with.} +\item{password}{(\code{character(1)})\cr +Password to login with.} + \item{check_interrupts}{(\code{logical(1)})\cr Should user interrupts be checked during the query execution?} @@ -90,6 +101,15 @@ The data source name to connect to.} \item{dbdir}{(\code{character(1)})\cr The directory where the database is located.} + +\item{driverClass}{(\code{character(1)})\cr +The name of the JDBC driver to load.} + +\item{classPath}{(\code{character(1)})\cr +The path to the JDBC driver.} + +\item{url}{(\code{character(1)})\cr +The source to connect to.} } \value{ An object that inherits from \code{DBIConnection} driver specified in \code{drv}. @@ -119,4 +139,6 @@ Certain drivers may use credentials stored in a file, such as ~/.pgpass (Postgre \link[odbc:dbConnect-OdbcDriver-method]{odbc::odbc} \link[duckdb:duckdb]{duckdb::duckdb} + +\link[RJDBC:JDBC]{RJDBC::JDBC} } diff --git a/pak.lock b/pak.lock index 415bbfc9..06f4fa88 100644 --- a/pak.lock +++ b/pak.lock @@ -2326,7 +2326,7 @@ "ref": "local::.", "binary": false, "dep_types": ["Depends", "Imports", "LinkingTo", "Suggests"], - "dependencies": ["checkmate", "DBI", "dbplyr", "dplyr", "glue", "openssl", "parallelly", "purrr", "rlang", "R6", "stringr", "tidyr", "tidyselect", "magrittr"], + "dependencies": ["checkmate", "DBI", "dbplyr", "dplyr", "glue", "openssl", "parallelly", "purrr", "rJava", "RJDBC", "rlang", "R6", "stringr", "tidyr", "tidyselect", "magrittr"], "direct": true, "directpkg": true, "install_args": "", @@ -2836,7 +2836,7 @@ "ref": "pkgdown", "binary": true, "dep_types": ["Depends", "Imports", "LinkingTo"], - "dependencies": ["bslib", "callr", "cli", "desc", "downlit", "fontawesome", "fs", "httr2", "jsonlite", "openssl", "purrr", "ragg", "rlang", "rmarkdown", "tibble", "whisker", "withr", "xml2", "yaml"], + "dependencies": ["bslib", "callr", "cli", "desc", "downlit", "fontawesome", "fs", "httr2", "jsonlite", "lifecycle", "openssl", "purrr", "ragg", "rlang", "rmarkdown", "tibble", "whisker", "withr", "xml2", "yaml"], "direct": false, "directpkg": false, "install_args": "", @@ -2847,7 +2847,7 @@ "RemoteRef": "pkgdown", "RemoteRepos": "https://p3m.dev/cran/__linux__/noble/latest", "RemotePkgPlatform": "x86_64-pc-linux-gnu-ubuntu-24.04", - "RemoteSha": "2.1.3" + "RemoteSha": "2.2.0" }, "needscompilation": false, "package": "pkgdown", @@ -2855,7 +2855,7 @@ "platform": "x86_64-pc-linux-gnu-ubuntu-24.04", "repotype": "cran", "rversion": "4.5", - "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/pkgdown_2.1.3.tar.gz", + "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/pkgdown_2.2.0.tar.gz", "sysreqs": "pandoc", "sysreqs_packages": [ { @@ -2865,9 +2865,9 @@ "post_install": {} } ], - "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/pkgdown_2.1.3.tar.gz", + "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/pkgdown_2.2.0.tar.gz", "type": "standard", - "version": "2.1.3", + "version": "2.2.0", "vignettes": false }, { @@ -3102,7 +3102,7 @@ "RemoteRef": "purrr", "RemoteRepos": "https://p3m.dev/cran/__linux__/noble/latest", "RemotePkgPlatform": "x86_64-pc-linux-gnu-ubuntu-24.04", - "RemoteSha": "1.1.0" + "RemoteSha": "1.2.0" }, "needscompilation": false, "package": "purrr", @@ -3110,12 +3110,12 @@ "platform": "x86_64-pc-linux-gnu-ubuntu-24.04", "repotype": "cran", "rversion": "4.5", - "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/purrr_1.1.0.tar.gz", + "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/purrr_1.2.0.tar.gz", "sysreqs": "", "sysreqs_packages": {}, - "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/purrr_1.1.0.tar.gz", + "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/purrr_1.2.0.tar.gz", "type": "standard", - "version": "1.1.0", + "version": "1.2.0", "vignettes": false }, { @@ -3566,6 +3566,81 @@ "version": "1.2.1", "vignettes": false }, + { + "ref": "rJava", + "binary": true, + "dep_types": ["Depends", "Imports", "LinkingTo"], + "dependencies": [], + "direct": false, + "directpkg": false, + "install_args": "", + "license": "LGPL-2.1", + "metadata": { + "RemotePkgRef": "rJava", + "RemoteType": "standard", + "RemoteRef": "rJava", + "RemoteRepos": "https://p3m.dev/cran/__linux__/noble/latest", + "RemotePkgPlatform": "x86_64-pc-linux-gnu-ubuntu-24.04", + "RemoteSha": "1.0-11" + }, + "needscompilation": false, + "package": "rJava", + "params": [], + "platform": "x86_64-pc-linux-gnu-ubuntu-24.04", + "repotype": "cran", + "rversion": "4.5", + "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/rJava_1.0-11.tar.gz", + "sysreqs": "Java JDK 1.2 or higher (for JRI/REngine JDK 1.4 or\n higher), GNU make", + "sysreqs_packages": [ + { + "sysreq": "gnumake", + "packages": "make", + "pre_install": {}, + "post_install": {} + }, + { + "sysreq": "java", + "packages": "default-jdk", + "pre_install": {}, + "post_install": "R CMD javareconf" + } + ], + "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/rJava_1.0-11.tar.gz", + "type": "standard", + "version": "1.0-11", + "vignettes": false + }, + { + "ref": "RJDBC", + "binary": true, + "dep_types": ["Depends", "Imports", "LinkingTo"], + "dependencies": ["DBI", "rJava"], + "direct": false, + "directpkg": false, + "install_args": "", + "license": "MIT + file LICENSE", + "metadata": { + "RemotePkgRef": "RJDBC", + "RemoteType": "standard", + "RemoteRef": "RJDBC", + "RemoteRepos": "https://p3m.dev/cran/__linux__/noble/latest", + "RemotePkgPlatform": "x86_64-pc-linux-gnu-ubuntu-24.04", + "RemoteSha": "0.2-10" + }, + "needscompilation": false, + "package": "RJDBC", + "params": [], + "platform": "x86_64-pc-linux-gnu-ubuntu-24.04", + "repotype": "cran", + "rversion": "4.5", + "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/RJDBC_0.2-10.tar.gz", + "sysreqs": "", + "sysreqs_packages": {}, + "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/RJDBC_0.2-10.tar.gz", + "type": "standard", + "version": "0.2-10", + "vignettes": false + }, { "ref": "rlang", "binary": true, @@ -3750,7 +3825,7 @@ "RemoteRef": "RSQLite", "RemoteRepos": "https://p3m.dev/cran/__linux__/noble/latest", "RemotePkgPlatform": "x86_64-pc-linux-gnu-ubuntu-24.04", - "RemoteSha": "2.4.3" + "RemoteSha": "2.4.4" }, "needscompilation": false, "package": "RSQLite", @@ -3758,12 +3833,12 @@ "platform": "x86_64-pc-linux-gnu-ubuntu-24.04", "repotype": "cran", "rversion": "4.5", - "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/RSQLite_2.4.3.tar.gz", + "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/RSQLite_2.4.4.tar.gz", "sysreqs": "", "sysreqs_packages": {}, - "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/RSQLite_2.4.3.tar.gz", + "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/RSQLite_2.4.4.tar.gz", "type": "standard", - "version": "2.4.3", + "version": "2.4.4", "vignettes": false }, { @@ -4167,7 +4242,7 @@ "RemoteRef": "stringr", "RemoteRepos": "https://p3m.dev/cran/__linux__/noble/latest", "RemotePkgPlatform": "x86_64-pc-linux-gnu-ubuntu-24.04", - "RemoteSha": "1.5.2" + "RemoteSha": "1.6.0" }, "needscompilation": false, "package": "stringr", @@ -4175,12 +4250,12 @@ "platform": "x86_64-pc-linux-gnu-ubuntu-24.04", "repotype": "cran", "rversion": "4.5", - "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/stringr_1.5.2.tar.gz", + "sources": "https://p3m.dev/cran/__linux__/noble/latest/src/contrib/stringr_1.6.0.tar.gz", "sysreqs": "", "sysreqs_packages": {}, - "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/stringr_1.5.2.tar.gz", + "target": "src/contrib/x86_64-pc-linux-gnu-ubuntu-24.04/4.5/stringr_1.6.0.tar.gz", "type": "standard", - "version": "1.5.2", + "version": "1.6.0", "vignettes": false }, { @@ -5097,8 +5172,8 @@ "version": "24.04", "url": {}, "pre_install": "apt-get -y update", - "install_scripts": "apt-get -y install libx11-dev git libcurl4-openssl-dev libssl-dev xz-utils make libgit2-dev zlib1g-dev pandoc unixodbc-dev libfreetype6-dev libjpeg-dev libpng-dev libtiff-dev libwebp-dev libpq-dev libicu-dev libfontconfig1-dev libfribidi-dev libharfbuzz-dev libxml2-dev", - "post_install": {}, - "packages": ["libx11-dev", "git", "libcurl4-openssl-dev", "libssl-dev", "xz-utils", "make", "libgit2-dev", "zlib1g-dev", "pandoc", "unixodbc-dev", "libfreetype6-dev", "libjpeg-dev", "libpng-dev", "libtiff-dev", "libwebp-dev", "libpq-dev", "libicu-dev", "libfontconfig1-dev", "libfribidi-dev", "libharfbuzz-dev", "libxml2-dev"] + "install_scripts": "apt-get -y install libx11-dev git libcurl4-openssl-dev libssl-dev xz-utils make libgit2-dev zlib1g-dev pandoc unixodbc-dev libfreetype6-dev libjpeg-dev libpng-dev libtiff-dev libwebp-dev default-jdk libpq-dev libicu-dev libfontconfig1-dev libfribidi-dev libharfbuzz-dev libxml2-dev", + "post_install": "R CMD javareconf", + "packages": ["libx11-dev", "git", "libcurl4-openssl-dev", "libssl-dev", "xz-utils", "make", "libgit2-dev", "zlib1g-dev", "pandoc", "unixodbc-dev", "libfreetype6-dev", "libjpeg-dev", "libpng-dev", "libtiff-dev", "libwebp-dev", "default-jdk", "libpq-dev", "libicu-dev", "libfontconfig1-dev", "libfribidi-dev", "libharfbuzz-dev", "libxml2-dev"] } } diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index be5af595..1da66d00 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -29,29 +29,65 @@ checkmate::reportAssertions(coll) for (conn in get_test_conns()) { # Start with some clean up - purrr::walk(c("test.mtcars", "__mtcars", "__mtcars_historical", "test.mtcars_modified", "mtcars_modified", - "test.SCDB_logs", "test.SCDB_logger", "test.SCDB_tmp1", "test.SCDB_tmp2", "test.SCDB_tmp3", - "test.SCDB_t0", "test.SCDB_t1", "test.SCDB_t2"), - ~ if (DBI::dbExistsTable(conn, id(., conn))) DBI::dbRemoveTable(conn, id(., conn))) - - purrr::walk(c(DBI::Id(schema = "test", table = "one.two"), DBI::Id(schema = "test.one", table = "two")), - ~ if (schema_exists(conn, .@name[["schema"]]) && DBI::dbExistsTable(conn, .)) DBI::dbRemoveTable(conn, .)) - + purrr::walk( + c("test.mtcars", "__mtcars", "__mtcars_historical", "test.mtcars_modified", "mtcars_modified", + "test.SCDB_logs", "test.SCDB_logger", "test.SCDB_tmp1", "test.SCDB_tmp2", "test.SCDB_tmp3", + "test.SCDB_t0", "test.SCDB_t1", "test.SCDB_t2" + ), + ~ if (DBI::dbExistsTable(conn, id(., conn))) DBI::dbRemoveTable(conn, id(., conn)) + ) + + purrr::walk( + c(DBI::Id(schema = "test", table = "one.two"), DBI::Id(schema = "test.one", table = "two")), + ~ if (schema_exists(conn, .@name[["schema"]]) && DBI::dbExistsTable(conn, .)) DBI::dbRemoveTable(conn, .) + ) # Copy mtcars to conn - dplyr::copy_to(conn, mtcars %>% dplyr::mutate(name = rownames(mtcars)), - name = id("test.mtcars", conn), temporary = FALSE, overwrite = TRUE) - - dplyr::copy_to(conn, mtcars %>% dplyr::mutate(name = rownames(mtcars)), - name = id("__mtcars", conn), temporary = FALSE, overwrite = TRUE) - - dplyr::copy_to(conn, - mtcars %>% - dplyr::mutate(name = rownames(mtcars)) %>% - digest_to_checksum() %>% - dplyr::mutate(from_ts = as.POSIXct("2020-01-01 09:00:00"), - until_ts = as.POSIXct(NA)), - name = id("__mtcars_historical", conn), temporary = FALSE, overwrite = TRUE) + dplyr::copy_to( + conn, mtcars %>% dplyr::mutate(name = rownames(mtcars)), + name = id("test.mtcars", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) + + dplyr::copy_to( + conn, mtcars %>% dplyr::mutate(name = rownames(mtcars)), + name = id("__mtcars", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) + + dplyr::copy_to( + conn, + mtcars %>% + dplyr::mutate(name = rownames(mtcars)) %>% + digest_to_checksum() %>% + dplyr::mutate( + from_ts = as.POSIXct("2020-01-01 09:00:00"), + until_ts = as.POSIXct(NA) + ), + name = id("__mtcars_historical", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) + + dplyr::copy_to( + conn, + mtcars %>% + dplyr::mutate(name = rownames(mtcars)) %>% + digest_to_checksum() %>% + dplyr::mutate( + from_ts = as.POSIXct("2020-01-01 09:00:00"), + until_ts = as.POSIXct(NA) + ), + name = id("MTCARS", conn), + temporary = FALSE, + overwrite = TRUE, + analyze = FALSE + ) DBI::dbDisconnect(conn) } diff --git a/tests/testthat/test-Logger.R b/tests/testthat/test-Logger.R deleted file mode 100644 index 788d237b..00000000 --- a/tests/testthat/test-Logger.R +++ /dev/null @@ -1,544 +0,0 @@ -# Ensure the options that can be set are NULL for these tests -withr::local_options("SCDB.log_table_id" = NULL, "SCDB.log_path" = NULL) - -test_that("Logger: logging to console works", { - - # Create logger and test configuration - expect_warning( - logger <- Logger$new(), # nolint: implicit_assignment_linter - regexp = "NO file or database logging will be done." - ) - expect_null(logger$log_path) - expect_null(logger$log_tbl) - - # Test logging to console has the right formatting and message type - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_message( - logger$log_info("test console", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - INFO - test console") - ) - expect_warning( - logger$log_warn("test console", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - WARNING - test console") - ) - expect_error( - logger$log_error("test console", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - ERROR - test console") - ) - - # Clean up - rm(logger) - invisible(gc()) -}) - - -test_that("Logger: all (non-warning, non-error) logging to console can be disabled", { - - # Create logger - expect_warning( - logger <- Logger$new(output_to_console = FALSE), # nolint: implicit_assignment_linter - regexp = "NO file or database logging will be done." - ) - - # Test INFO-logging to console is disabled - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_no_message(logger$log_info("test", tic = logger$start_time)) - - # Clean up - rm(logger) - invisible(gc()) -}) - - -test_that("Logger: logging to file works", { - - # Set options for the test - log_path <- tempdir(check = TRUE) - db_table <- "test.SCDB_logger" - - - # Create logger and test configuration - # Empty logger should use default value from options - withr::with_options( - list("SCDB.log_path" = "local/path"), - { - logger <- Logger$new(db_table = db_table, warn = FALSE) - expect_identical(logger$log_path, "local/path") - - rm(logger) - invisible(gc()) - } - ) - - # Create logger and test configuration - # Test file logging - with character timestamp - timestamp <- "2022-01-01 09:00:00" - logger <- Logger$new( - db_table = db_table, - timestamp = timestamp, - log_path = log_path, - output_to_console = FALSE, - warn = FALSE - ) - - expect_identical(logger$log_path, log_path) - expect_identical( - logger$log_filename, - glue::glue("{format(logger$start_time, '%Y%m%d.%H%M')}.", - "{format(as.POSIXct(timestamp), '%Y_%m_%d')}.", - "{db_table}.log") - ) - - - # Test logging to file has the right formatting and message type - expect_no_message(logger$log_info("test filewriting", tic = logger$start_time)) - tryCatch(logger$log_warn("test filewriting", tic = logger$start_time), warning = function(w) NULL) - tryCatch(logger$log_error("test filewriting", tic = logger$start_time), error = function(e) NULL) - - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_true(logger$log_filename %in% dir(log_path)) - expect_identical( - readLines(logger$log_realpath), - c( - glue::glue("{ts_str} - {Sys.info()[['user']]} - INFO - test filewriting"), - glue::glue("{ts_str} - {Sys.info()[['user']]} - WARNING - test filewriting"), - glue::glue("{ts_str} - {Sys.info()[['user']]} - ERROR - test filewriting") - ) - ) - - file.remove(logger$log_realpath) - rm(logger) - invisible(gc()) - - - # Create logger and test configuration - # Test file logging - with POSIX timestamp - timestamp <- as.POSIXct("2022-02-01 09:00:00") - logger <- Logger$new( - db_table = db_table, - timestamp = timestamp, - log_path = log_path, - output_to_console = FALSE, - warn = FALSE - ) - - expect_identical(logger$log_path, log_path) - expect_identical( - logger$log_filename, - glue::glue("{format(logger$start_time, '%Y%m%d.%H%M')}.", - "{format(as.POSIXct(timestamp), '%Y_%m_%d')}.", - "{db_table}.log") - ) - - - # Test logging to file still works - expect_no_message(logger$log_info("test filewriting", tic = logger$start_time)) - - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_true(logger$log_filename %in% dir(log_path)) - expect_identical( - readLines(logger$log_realpath), - glue::glue( - "{ts_str} - {Sys.info()[['user']]} - INFO - test filewriting" - ) - ) - - # Clean up - file.remove(logger$log_realpath) - rm(logger) - invisible(gc()) -}) - - -test_that("Logger: log_tbl is not set when conn = NULL", { - - # Set options for the test - db_table <- "test.SCDB_logger" - timestamp <- "2022-03-01 09:00:00" - - # Create logger and test configuration - # Empty logger should use default value - logger <- Logger$new(db_table = db_table, timestamp = timestamp, warn = FALSE) - expect_null(logger$log_tbl) # log_table_id is NOT defined here, despite the option existing - # the logger does not have the connection, so cannot pull the table from conn - - # Clean up - rm(logger) - invisible(gc()) -}) - - -test_that("Logger: logging to database works", { - for (conn in get_test_conns()) { - - # Set options for the test - db_table <- "test.SCDB_logger" - timestamp <- "2022-04-01 09:00:00" - - # Create logger and test configuration - logger <- Logger$new(db_table = db_table, - timestamp = timestamp, - log_table_id = db_table, - log_conn = conn, - warn = FALSE) - - log_table_id <- dplyr::tbl(conn, id(db_table, conn)) - expect_identical(logger$log_tbl, log_table_id) - - - # Test Logger has pre-filled some information in the logs - db_table_id <- id(db_table, conn) - expect_identical(as.character(dplyr::pull(log_table_id, "date")), timestamp) - if ("catalog" %in% purrr::pluck(db_table_id, "name", names)) { - expect_identical(dplyr::pull(log_table_id, "catalog"), purrr::pluck(db_table_id, "name", "catalog")) - } - expect_identical(dplyr::pull(log_table_id, "schema"), purrr::pluck(db_table_id, "name", "schema")) - expect_identical(dplyr::pull(log_table_id, "table"), purrr::pluck(db_table_id, "name", "table")) - expect_identical( # Transferring start_time to database can have some loss of information that we need to match - format(as.POSIXct(dplyr::pull(log_table_id, "start_time")), "%F %R:%S"), - format(logger$start_time, "%F %R:%S") - ) - - - # Test logging to database writes to the correct fields - logger$log_to_db(n_insertions = 42) - expect_identical(nrow(log_table_id), 1L) - expect_identical(dplyr::pull(log_table_id, "n_insertions"), 42L) - - logger$log_to_db(n_deactivations = 60) - expect_identical(nrow(log_table_id), 1L) - expect_identical(dplyr::pull(log_table_id, "n_deactivations"), 60L) - - - # Clean up - rm(logger) - invisible(gc()) - - connection_clean_up(conn) - } -}) - - -test_that("Logger: all logging simultaneously works", { - for (conn in get_test_conns()) { - - # Set options for the test - log_path <- tempdir(check = TRUE) - db_table <- "test.SCDB_logger" - timestamp <- "2022-05-01 09:00:00" - - # Create logger and test configuration - logger <- Logger$new(db_table = db_table, timestamp = timestamp, log_path = log_path, - log_table_id = db_table, log_conn = conn, warn = FALSE) - - log_table_id <- dplyr::tbl(conn, id(db_table, conn)) - expect_identical(logger$log_path, log_path) - expect_identical(logger$log_tbl, log_table_id) - expect_identical( - logger$log_filename, - glue::glue("{format(logger$start_time, '%Y%m%d.%H%M')}.", - "{format(as.POSIXct(timestamp), '%Y_%m_%d')}.", - "{id(db_table, conn)}.log") - ) - - # Test logging to console has the right formatting and message type - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_message( - logger$log_info("test console and filewriting", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - INFO - test console and filewriting") - ) - expect_warning( - logger$log_warn("test console and filewriting", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - WARNING - test console and filewriting") - ) - expect_error( - logger$log_error("test console and filewriting", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - ERROR - test console and filewriting") - ) - - - # Test logging to file has the right formatting and message type - expect_true(logger$log_filename %in% dir(log_path)) - expect_identical( - readLines(logger$log_realpath), - c( - glue::glue("{ts_str} - {Sys.info()[['user']]} - INFO - test console and filewriting"), - glue::glue("{ts_str} - {Sys.info()[['user']]} - WARNING - test console and filewriting"), - glue::glue("{ts_str} - {Sys.info()[['user']]} - ERROR - test console and filewriting") - ) - ) - - - # Test logging to database writes to the correct fields - logger$log_to_db(n_insertions = 13) - expect_identical(nrow(log_table_id), 2L) - expect_identical(dplyr::pull(log_table_id, "n_insertions"), c(42L, 13L)) - - logger$log_to_db(n_deactivations = 37) - expect_identical(nrow(log_table_id), 2L) - expect_identical(dplyr::pull(log_table_id, "n_deactivations"), c(60L, 37L)) - - - # Clean up - file.remove(logger$log_realpath) - rm(logger) - invisible(gc()) - - connection_clean_up(conn) - } -}) - - -test_that("Logger: file logging stops if file exists", { - - # Set options for the test - log_path <- tempdir(check = TRUE) - db_table <- "test.SCDB_logger" - timestamp <- Sys.time() - - # Create logger1 and logger2 which uses the same file - # Since start_time is the same for both - logger1 <- Logger$new( - db_table = db_table, - timestamp = timestamp, - start_time = timestamp, - log_path = log_path, - output_to_console = FALSE - ) - - logger2 <- Logger$new( - db_table = db_table, - timestamp = timestamp, - start_time = timestamp, - log_path = log_path, - output_to_console = FALSE - ) - - - # logger1 should be able to successfully write - logger1$log_info("test message") - - - # whereas logger2 should fail since the log file now exists - expect_error( - logger2$log_info("test message"), - glue::glue("Log file '{logger1$log_filename}' already exists!") - ) - - # .. and it should do it persistently - expect_error( - logger2$log_info("test message"), - glue::glue("Log file '{logger1$log_filename}' already exists!") - ) - - # Clean up - file.remove(logger1$log_realpath) - rm(logger1, logger2) - invisible(gc()) -}) - - -test_that("Logger: console output may be disabled", { - - # First test cases with output_to_console == FALSE - # Here, only print when explicitly stated - expect_warning( - logger <- Logger$new(output_to_console = FALSE), # nolint: implicit_assignment_linter - regexp = "NO file or database logging will be done." - ) - - expect_no_message(logger$log_info("Whoops! This should not have been printed!")) - - expect_no_message(logger$log_info("Whoops! This should not have been printed either!", output_to_console = FALSE)) - - expect_message( - logger$log_info("This line should be printed", output_to_console = TRUE), - "This line should be printed" - ) - - rm(logger) - - # ...and now, only suppress printing when explicitly stated - expect_warning( - logger <- Logger$new(output_to_console = TRUE), # nolint: implicit_assignment_linter - regexp = "NO file or database logging will be done." - ) - - expect_message( - logger$log_info("This line should be printed"), - "This line should be printed" - ) - expect_message( - logger$log_info("This line should also be printed", output_to_console = TRUE), - "This line should also be printed" - ) - - expect_no_message(logger$log_info("Whoops! This should not have been printed at all!", output_to_console = FALSE)) -}) - - -test_that("Logger: log_file is NULL in database if not writing to file", { - for (conn in get_test_conns()) { - - # Set options for the test - db_table <- "test.SCDB_logger" - timestamp <- "2022-06-01 09:00:00" - - # Create logger and test configuration - logger <- Logger$new(db_table = db_table, timestamp = timestamp, log_conn = conn, log_table_id = "test.SCDB_logger") - - # While logger is active, log_file should be set as the random generated - db_log_file <- dplyr::pull(dplyr::filter(logger$log_tbl, log_file == !!logger$log_filename)) - expect_length(db_log_file, 1) - expect_match(db_log_file, "^.+$") - - # When finalising, log_file should be set to NULL - logger$.__enclos_env__$private$finalize() - - db_log_file <- dplyr::pull(dplyr::filter(logger$log_tbl, log_file == !!logger$log_filename)) - expect_length(db_log_file, 0) - - # Test that an error is thrown if the database record has been finalized - expect_error( - logger$log_to_db(message = "This should produce an error"), - "Logger has already been finalized\\. Cannot write to database log table\\." - ) - - - # Clean up - rm(logger) - invisible(gc()) - - connection_clean_up(conn) - } -}) - - -test_that("Logger: $finalize() handles log table is at some point deleted", { - for (conn in get_test_conns()) { - - # Set options for the test - db_table <- "test.SCDB_logger" - timestamp <- "2022-06-01 09:00:00" - - log_table_id <- "expendable_log_table" - logger <- Logger$new(db_table = db_table, timestamp = timestamp, log_conn = conn, log_table_id = log_table_id) - - DBI::dbRemoveTable(conn, id(log_table_id, conn)) - - expect_no_error(logger$log_to_db(n_insertions = 42)) - - expect_no_error(logger$.__enclos_env__$private$finalize()) - - # Clean up - rm(logger) - invisible(gc()) - - connection_clean_up(conn) - } -}) - -test_that("Logger: custom timestamp_format works", { - - # Create logger and test configuration - expect_warning( - logger <- Logger$new(), # nolint: implicit_assignment_linter - regexp = "NO file or database logging will be done." - ) - - # Test logging to console has the right formatting and message type - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_message( - logger$log_info("test console", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - INFO - test console") - ) - - ts_str <- format(logger$start_time, "%F %R") - expect_message( - logger$log_info("test console", tic = logger$start_time, timestamp_format = "%F %R"), - glue::glue("{ts_str} - {Sys.info()[['user']]} - INFO - test console") - ) - - ts_str <- format(logger$start_time, "%F") - withr::local_options("SCDB.log_timestamp_format" = "%F") - expect_message( - logger$log_info("test console", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - INFO - test console") - ) - - # Clean up - rm(logger) - invisible(gc()) -}) - - -test_that("LoggerNull: no console logging occurs", { - - # Create logger and test configuration - logger <- expect_no_message(LoggerNull$new()) - - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_no_message(logger$log_info("test console", tic = logger$start_time)) - - # Test logging to console has the right formatting and message type - ts_str <- format(logger$start_time, "%F %R:%OS3") - expect_no_message( - logger$log_info("test console", tic = logger$start_time) - ) - expect_warning( - logger$log_warn("test console", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - WARNING - test console") - ) - expect_error( - logger$log_error("test console", tic = logger$start_time), - glue::glue("{ts_str} - {Sys.info()[['user']]} - ERROR - test console") - ) - - # Clean up - rm(logger) - invisible(gc()) -}) - - -test_that("LoggerNull: no file logging occurs", { - withr::local_options("SCDB.log_path" = tempdir()) - - # Create logger and test configuration - logger <- expect_no_message(LoggerNull$new()) - - expect_no_message(logger$log_info("test filewriting", tic = logger$start_time)) - expect_false(logger$log_filename %in% dir(getOption("SCDB.log_path"))) - - # Clean up - rm(logger) - invisible(gc()) -}) - - -test_that("LoggerNull: no database logging occurs", { - for (conn in get_test_conns()) { - - # Set options for the test - db_table <- "test.SCDB_logger" - timestamp <- "2022-06-01 09:00:00" - - # Count entries in log - n_log_entries <- nrow(dplyr::tbl(conn, id("test.SCDB_logger", conn))) - - # Create LoggerNull and test configuration - logger <- LoggerNull$new( - db_table = db_table, timestamp = timestamp, - log_conn = conn, log_table_id = "test.SCDB_logger" - ) - - expect_no_message(logger$log_to_db(n_insertions = 42)) - expect_no_message(logger$finalize_db_entry()) - expect_identical(nrow(dplyr::tbl(conn, id("test.SCDB_logger", conn))), n_log_entries) - - # Clean up - rm(logger) - invisible(gc()) - - close_connection(conn) - } -}) diff --git a/tests/testthat/test-connection.R b/tests/testthat/test-connection.R deleted file mode 100644 index 4fc2094c..00000000 --- a/tests/testthat/test-connection.R +++ /dev/null @@ -1,34 +0,0 @@ -test_that("get_connection() works", { - for (conn in get_test_conns()) { - expect_true(DBI::dbIsValid(conn)) - - connection_clean_up(conn) - } -}) - - -test_that("get_connection() notifies if connection fails", { - skip_if_not_installed("RSQLite") - - for (i in 1:100) { - random_string <- paste(sample(letters, size = 32, replace = TRUE), collapse = "") - - if (dir.exists(random_string)) next - - expect_error( - get_connection(drv = RSQLite::SQLite(), dbname = file.path(random_string, "/invalid_path")), - regexp = "checkmate::check_path_for_output\\(dbname\\)" - ) - } -}) - - -test_that("close_connection() works", { - for (conn in get_test_conns()) { - - # Check that we can close the connection - expect_true(DBI::dbIsValid(conn)) - close_connection(conn) - expect_false(DBI::dbIsValid(conn)) - } -}) diff --git a/tests/testthat/test-create_logs_if_missing.R b/tests/testthat/test-create_logs_if_missing.R deleted file mode 100644 index 43d4a8af..00000000 --- a/tests/testthat/test-create_logs_if_missing.R +++ /dev/null @@ -1,66 +0,0 @@ -test_that("create_logs_if_missing() can create logs in default and test schema", { - for (conn in get_test_conns()) { - for (schema in list(NULL, "test")) { - - # Generate table in schema that does not exist - k <- 0 - while (k < 100) { - logs_id <- paste(c(schema, paste(sample(letters, size = 16, replace = TRUE), collapse = "")), collapse = ".") - k <- k + 1 - if (DBI::dbExistsTable(conn, id(logs_id, conn))) next - break - } - - if (k < 100) { - - # We know table does not exists - expect_false(table_exists(conn, logs_id)) - - # We create the missing log table - expect_no_error(create_logs_if_missing(conn, log_table = logs_id)) - - # And check it conforms with the requirements - expect_true(table_exists(conn, logs_id)) - expect_identical(nrow(dplyr::tbl(conn, id(logs_id, conn))), 0L) - - log_signature <- data.frame( - date = as.POSIXct(character(0)), - catalog = character(0), - schema = character(0), - table = character(0), - n_insertions = integer(0), - n_deactivations = integer(0), - start_time = as.POSIXct(character(0)), - end_time = as.POSIXct(character(0)), - duration = character(0), - success = logical(), - message = character(0), - log_file = character(0) - ) - - if (!checkmate::test_multi_class(conn, c("Microsoft SQL Server", "duckdb_connection"))) { - log_signature <- dplyr::select(log_signature, !"catalog") - } - - log_signature <- log_signature %>% - dplyr::copy_to(conn, df = ., unique_table_name()) %>% - dplyr::collect() - - expect_identical( - dplyr::collect(dplyr::tbl(conn, id(logs_id, conn))), - log_signature - ) - - # Attempting to recreate the logs table should not change anything - expect_no_error(create_logs_if_missing(conn, log_table = logs_id)) - expect_true(table_exists(conn, logs_id)) - expect_identical(nrow(dplyr::tbl(conn, id(logs_id, conn))), 0L) - - } else { - warning("Non-existing table in default schema could not be generated!", call. = FALSE) - } - - } - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-create_table.R b/tests/testthat/test-create_table.R deleted file mode 100644 index 3471ada3..00000000 --- a/tests/testthat/test-create_table.R +++ /dev/null @@ -1,98 +0,0 @@ -test_that("create_table() refuses a historical table", { - expect_error( - cars %>% - dplyr::mutate(from_ts = NA) %>% - create_table(db_table = "fail.cars"), - "checksum/from_ts/until_ts column\\(s\\) already exist\\(s\\) in \\.data!" - ) -}) - - -test_that("create_table() can create temporary tables", { - for (conn in get_test_conns()) { - - table <- create_table(cars, db_table = unique_table_name(), conn = conn, temporary = TRUE) - - expect_identical(colnames(table), c(colnames(cars), "checksum", "from_ts", "until_ts")) - expect_identical( - dplyr::collect(dplyr::select(table, -tidyselect::all_of(c("checksum", "from_ts", "until_ts")))), - dplyr::collect(dplyr::copy_to(conn, cars, unique_table_name()) %>% utils::head(0)) - ) - - connection_clean_up(conn) - } -}) - - -test_that("create_table() can create tables in default schema", { - for (conn in get_test_conns()) { - - table <- create_table(cars, db_table = unique_table_name(), conn = conn, temporary = FALSE) - defer_db_cleanup(table) - - expect_identical(colnames(table), c(colnames(cars), "checksum", "from_ts", "until_ts")) - expect_identical( - dplyr::collect(dplyr::select(table, -tidyselect::all_of(c("checksum", "from_ts", "until_ts")))), - dplyr::collect(dplyr::copy_to(conn, cars, unique_table_name()) %>% utils::head(0)) - ) - - connection_clean_up(conn) - } -}) - - -test_that("create_table() can create tables in non default schema", { - for (conn in get_test_conns()) { - - table <- create_table( - cars, db_table = id(paste0("test.", unique_table_name()), conn), conn = conn, temporary = FALSE - ) - defer_db_cleanup(table) - - expect_identical(colnames(table), c(colnames(cars), "checksum", "from_ts", "until_ts")) - expect_identical( - dplyr::collect(dplyr::select(table, -tidyselect::all_of(c("checksum", "from_ts", "until_ts")))), - dplyr::collect(dplyr::copy_to(conn, cars, unique_table_name()) %>% utils::head(0)) - ) - - connection_clean_up(conn) - } -}) - - -test_that("create_table() works with no conn", { - table <- create_table(cars, db_table = unique_table_name(), conn = NULL) - - expect_identical(colnames(table), c(colnames(cars), "checksum", "from_ts", "until_ts")) - expect_identical( - dplyr::select(table, -tidyselect::all_of(c("checksum", "from_ts", "until_ts"))), - cars %>% utils::head(0) - ) -}) - - -test_that("create_table() does not overwrite tables", { - for (conn in get_test_conns()) { - - table_name <- unique_table_name() - table <- create_table(cars, db_table = table_name, conn = conn, temporary = TRUE) - - table_regex <- paste( - paste(c(get_catalog(conn, temporary = TRUE), get_schema(conn, temporary = TRUE)), collapse = "."), - paste0("#?", table_name), - sep = "." - ) - - expect_error( - create_table(iris, db_table = table_name, conn = conn, temporary = TRUE), - regexp = paste("Table", table_regex, "already exists!") - ) - - expect_identical( - dplyr::collect(dplyr::select(table, -tidyselect::all_of(c("checksum", "from_ts", "until_ts")))), - dplyr::collect(dplyr::copy_to(conn, cars, unique_table_name()) %>% utils::head(0)) - ) - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-db_functions.R b/tests/testthat/test-db_functions.R deleted file mode 100644 index afa393de..00000000 --- a/tests/testthat/test-db_functions.R +++ /dev/null @@ -1,9 +0,0 @@ -test_that("is.historical() works", { - for (conn in get_test_conns()) { - - expect_true(is.historical(dplyr::tbl(conn, id("__mtcars_historical", conn)))) - expect_false(is.historical(dplyr::tbl(conn, id("__mtcars", conn)))) - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-db_joins.R b/tests/testthat/test-db_joins.R deleted file mode 100644 index 8fac777d..00000000 --- a/tests/testthat/test-db_joins.R +++ /dev/null @@ -1,196 +0,0 @@ -test_that("*_join() works with character `by` and `na_by`", { - for (conn in get_test_conns()) { - - # Create two more synthetic test data set with NA data - - # First test case - x <- data.frame(number = c("1", "2", NA), - t = c("strA", NA, "strB")) - - y <- data.frame(letter = c("A", "B", "A", "B"), - number = c(NA, "2", "1", "1")) - - # Copy x and y to conn - x <- dplyr::copy_to(conn, x, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) - - y <- dplyr::copy_to(conn, y, name = id("test.SCDB_tmp2", conn), overwrite = TRUE, temporary = FALSE) - - - q <- dplyr::left_join(x, y, na_by = "number") %>% - dplyr::collect() %>% - dplyr::arrange(number, t, letter) - qr <- dplyr::left_join(dplyr::collect(x), dplyr::collect(y), by = "number", multiple = "all") %>% - dplyr::arrange(number, t, letter) - expect_mapequal(q, qr) - - q <- dplyr::right_join(x, y, na_by = "number") %>% - dplyr::collect() %>% - dplyr::arrange(number, t, letter) - qr <- dplyr::right_join(dplyr::collect(x), dplyr::collect(y), by = "number", multiple = "all") %>% - dplyr::arrange(number, t, letter) - expect_identical(q, qr) - - q <- dplyr::inner_join(x, y, na_by = "number") %>% - dplyr::collect() %>% - dplyr::arrange(number, t, letter) - qr <- dplyr::full_join(dplyr::collect(x), dplyr::collect(y), by = "number", multiple = "all") %>% - dplyr::arrange(number, t, letter) - expect_identical(q, qr) - - - # Second test case - x <- data.frame(date = as.Date(c("2022-05-01", "2022-05-01", "2022-05-02", "2022-05-02")), - region_id = c("1", NA, NA, "1"), - n_start = c(3, NA, NA, NA)) - - y <- data.frame(date = as.Date("2022-05-02"), - region_id = "1", - n_add = 4) - - # Copy x and y to conn - x <- dplyr::copy_to(conn, x, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) - - y <- dplyr::copy_to(conn, y, name = id("test.SCDB_tmp2", conn), overwrite = TRUE, temporary = FALSE) - - - q <- dplyr::full_join(x, y, by = "date", na_by = "region_id") %>% - dplyr::collect() %>% - dplyr::arrange(date, region_id) - qr <- dplyr::full_join(dplyr::collect(x), dplyr::collect(y), by = c("date", "region_id")) %>% - dplyr::arrange(date, region_id) - expect_identical(q, qr) - - - - - # Some other test cases - x <- get_table(conn, "__mtcars") %>% - dplyr::select(name, mpg, cyl, hp, vs, am, gear, carb) - - y <- get_table(conn, "__mtcars") %>% - dplyr::select(name, drat, wt, qsec) - - xx <- x %>% dplyr::mutate(name = dplyr::if_else(dplyr::row_number() == 1, NA, name)) - yy <- y %>% dplyr::mutate(name = dplyr::if_else(dplyr::row_number() == 1, NA, name)) - - # Using by should give 1 mismatch - # Using na_by should give no mismatch - expect_identical( - dplyr::left_join(xx, xx, by = "name") %>% - dplyr::summarize(n = sum(dplyr::if_else(is.na(cyl.y), 1, 0), na.rm = TRUE)) %>% # nolint: redundant_ifelse_linter - dplyr::pull(n), - 1 - ) - expect_identical( - dplyr::left_join(xx, xx, na_by = "name") %>% - dplyr::summarize(n = sum(dplyr::if_else(is.na(cyl.y), 1, 0), na.rm = TRUE)) %>% # nolint: redundant_ifelse_linter - dplyr::pull(n), - 0 - ) - - # And they should be identical with the simple case - expect_identical( - dplyr::left_join(xx, xx, na_by = "name") %>% - dplyr::select(!"name") %>% - dplyr::collect(), - dplyr::left_join(x, x, na_by = "name") %>% - dplyr::select(!"name") %>% - dplyr::collect() - ) - - connection_clean_up(conn) - } -}) - - -test_that("*_join() works with `dplyr::join_by()`", { - for (conn in get_test_conns()) { - - # Define two test datasets - x <- get_table(conn, "__mtcars") %>% - dplyr::select(name, mpg, cyl, hp, vs, am, gear, carb) - - y <- get_table(conn, "__mtcars") %>% - dplyr::select(name, drat, wt, qsec) - - - # Test the implemented joins - q <- dplyr::left_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - qr <- dplyr::left_join(dplyr::collect(x), dplyr::collect(y), by = dplyr::join_by(x$name == y$name)) - expect_identical(q, qr) - - q <- dplyr::right_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - qr <- dplyr::right_join(dplyr::collect(x), dplyr::collect(y), by = dplyr::join_by(x$name == y$name)) - expect_identical(q, qr) - - q <- dplyr::inner_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - qr <- dplyr::inner_join(dplyr::collect(x), dplyr::collect(y), by = dplyr::join_by(x$name == y$name)) - expect_identical(q, qr) - - connection_clean_up(conn) - } -}) - - -test_that("*_join() does not break any dplyr joins", { - for (conn in get_test_conns()) { - - # Define two test datasets - x <- get_table(conn, "__mtcars") %>% - dplyr::select(name, mpg, cyl, hp, vs, am, gear, carb) - - y <- get_table(conn, "__mtcars") %>% - dplyr::select(name, drat, wt, qsec) - - # Test the standard joins - # left_join - qr <- dplyr::left_join(dplyr::collect(x), dplyr::collect(y), by = "name") - q <- dplyr::left_join(x, y, by = "name") %>% dplyr::collect() - expect_identical(q, qr) - - q <- dplyr::left_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - expect_identical(q, qr) - - # right_join - qr <- dplyr::right_join(dplyr::collect(x), dplyr::collect(y), by = "name") - q <- dplyr::right_join(x, y, by = "name") %>% dplyr::collect() - expect_identical(q, qr) - - q <- dplyr::right_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - expect_identical(q, qr) - - # inner_join - qr <- dplyr::inner_join(dplyr::collect(x), dplyr::collect(y), by = "name") - q <- dplyr::inner_join(x, y, by = "name") %>% dplyr::collect() - expect_identical(q, qr) - - q <- dplyr::inner_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - expect_identical(q, qr) - - # full_join - qr <- dplyr::full_join(dplyr::collect(x), dplyr::collect(y), by = "name") - q <- dplyr::full_join(x, y, by = "name") %>% dplyr::collect() - expect_identical(q, qr) - - q <- dplyr::full_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - expect_identical(q, qr) - - # semi_join - qr <- dplyr::semi_join(dplyr::collect(x), dplyr::collect(y), by = "name") - q <- dplyr::semi_join(x, y, by = "name") %>% dplyr::collect() - expect_identical(q, qr) - - q <- dplyr::semi_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - expect_identical(q, qr) - - # anti_join - qr <- dplyr::anti_join(dplyr::collect(x), dplyr::collect(y), by = "name") - q <- dplyr::anti_join(x, y, by = "name") %>% dplyr::collect() - expect_identical(q, qr) - - q <- dplyr::anti_join(x, y, by = dplyr::join_by(x$name == y$name)) %>% dplyr::collect() - expect_identical(q, qr) - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-defer_db_cleanup.R b/tests/testthat/test-defer_db_cleanup.R new file mode 100644 index 00000000..6797bb60 --- /dev/null +++ b/tests/testthat/test-defer_db_cleanup.R @@ -0,0 +1,21 @@ +test_that("`defer_db_cleanup()` deletes tables", { + for (conn in get_test_conns()) { + table <- dplyr::copy_to(conn, mtcars, "__mtcars_defer_db_cleanup") + table_id <- id(table, conn) + stop(table_id) + + # Table exists + expect_true(DBI::dbExistsTable(conn, "__mtcars_defer_db_cleanup")) + expect_true(DBI::dbExistsTable(conn, table_id)) + + # Marking for deletion does not delete the table + defer_db_cleanup(table_id) + expect_true(DBI::dbExistsTable(conn, table_id)) + + # Manually triggering deletion deletes the table + withr::deferred_run() + expect_false(DBI::dbExistsTable(conn, table_id)) + + connection_clean_up(conn) + } +}) diff --git a/tests/testthat/test-digest_to_checksum.R b/tests/testthat/test-digest_to_checksum.R deleted file mode 100644 index 358785c5..00000000 --- a/tests/testthat/test-digest_to_checksum.R +++ /dev/null @@ -1,59 +0,0 @@ -test_that("digest_to_checksum() works", { - for (conn in get_test_conns()) { - - expect_s3_class(mtcars %>% digest_to_checksum(), "data.frame") - expect_s3_class(mtcars %>% tibble::as_tibble() %>% digest_to_checksum(), "tbl_df") - expect_s3_class(get_table(conn, "__mtcars") %>% digest_to_checksum(), "tbl_dbi") - - # Check that col argument works - expect_identical( - mtcars %>% digest_to_checksum(col = "checky") %>% dplyr::pull("checky"), - mtcars %>% digest_to_checksum() %>% dplyr::pull("checksum") - ) - - - expect_identical( - mtcars %>% dplyr::mutate(name = rownames(mtcars)) %>% digest_to_checksum() %>% colnames(), - get_table(conn, "__mtcars") %>% digest_to_checksum() %>% colnames() - ) - - - # Check that NA's generate unique checksums - x <- data.frame(col1 = c("A", NA), - col2 = c(NA, "A")) - - # .. locally - checksums <- x %>% digest_to_checksum() %>% dplyr::pull("checksum") - expect_false(checksums[1] == checksums[2]) - - # .. and on the remote - x <- dplyr::copy_to(conn, x, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) - - checksums <- x %>% digest_to_checksum() %>% dplyr::pull("checksum") - expect_false(checksums[1] == checksums[2]) - - connection_clean_up(conn) - } -}) - - -test_that("digest_to_checksum() warns works correctly when overwriting", { - for (conn in get_test_conns()) { - - checksum_vector_1 <- mtcars %>% - digest_to_checksum() %>% - dplyr::pull(checksum) - - expect_warning( - checksum_vector_2 <- mtcars %>% # nolint: implicit_assignment_linter - digest_to_checksum(col = "checksum") %>% - digest_to_checksum(col = "checksum") %>% - dplyr::pull(checksum), - "Column checksum already exists in data and will be overwritten!" - ) - - expect_identical(checksum_vector_1, checksum_vector_2) - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-filter_keys.R b/tests/testthat/test-filter_keys.R deleted file mode 100644 index 31f8cab3..00000000 --- a/tests/testthat/test-filter_keys.R +++ /dev/null @@ -1,77 +0,0 @@ -test_that("filter_keys() works", { - for (conn in get_test_conns()) { - - x <- get_table(conn, "__mtcars") - - expect_identical( - x, - x %>% filter_keys(NULL) - ) - - filter <- x %>% utils::head(10) %>% dplyr::select("name") - expect_identical( - x %>% - dplyr::filter(name %in% !!dplyr::pull(filter, "name")) %>% - dplyr::collect(), - x %>% - filter_keys(filter) %>% - dplyr::collect() - ) - - filter <- x %>% utils::head(10) %>% dplyr::select("vs", "am") %>% dplyr::distinct() - expect_identical( - x %>% - dplyr::inner_join(filter, by = c("vs", "am")) %>% - dplyr::collect(), - x %>% - filter_keys(filter) %>% - dplyr::collect() - ) - - # Filtering with null means no filtering is done - m <- mtcars - row.names(m) <- NULL - filter <- NULL - expect_identical(filter_keys(m, filter), m) - - # Filtering by vs = 0 - filter <- data.frame(vs = 0) - expect_mapequal(filter_keys(m, filter), dplyr::filter(m, .data$vs == 0)) - - # Empty filter should result in no rows - expect_identical( - utils::head(x, 0), - x %>% filter_keys(data.frame(vs = numeric(0), am = numeric(0))) - ) - - connection_clean_up(conn) - } -}) - - -test_that("filter_keys() works with copy = TRUE", { - for (conn in get_test_conns()) { - - x <- get_table(conn, "__mtcars") - - filter <- x %>% - utils::head(10) %>% - dplyr::select("name") %>% - dplyr::collect() - - expect_identical( - x %>% - dplyr::filter(.data$name %in% !!dplyr::pull(filter, "name")) %>% - dplyr::collect(), - x %>% - filter_keys(filter, copy = TRUE) %>% - dplyr::collect() - ) - - # The above filter_keys with `copy = TRUE` generates a dbplyr_### table. - # We manually remove this since we expect it. If more arise, we will get an error. - DBI::dbRemoveTable(conn, id(utils::head(get_tables(conn, "dbplyr_"), 1))) - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-getTableSignature.R b/tests/testthat/test-getTableSignature.R deleted file mode 100644 index f5859a07..00000000 --- a/tests/testthat/test-getTableSignature.R +++ /dev/null @@ -1,369 +0,0 @@ -withr::local_options("stringsAsFactors" = FALSE) # Compatibility with R < 4.0.0 - -# Generate test datasets with different data types - -# One that follows the structure in update_snapshot() -data_update_snapsnot <- data.frame( - "Date" = Sys.Date(), - "POSIXct" = Sys.time(), - "character" = "test", - "integer" = 1L, - "numeric" = 1, - "logical" = TRUE, - # .. and our special columns - "checksum" = "test", - "from_ts" = Sys.time(), - "until_ts" = Sys.time() -) - -# One that has the special columns of update_snapshot(), but not at the end -data_random <- data.frame( - "Date" = Sys.Date(), - "POSIXct" = Sys.time(), - "character" = "test", - # .. Our special columns, but not at the end - "checksum" = "test", - "from_ts" = Sys.time(), - "until_ts" = Sys.time(), - # .. - "integer" = 1L, - "numeric" = 1, - "logical" = TRUE -) - -for (conn in c(list(NULL), get_test_conns())) { - - if (is.null(conn)) { - test_that("getTableSignature() generates signature for update_snapshot() (conn == NULL)", { - expect_identical( - getTableSignature(data_update_snapsnot, conn), - c( - "Date" = "Date", - "POSIXct" = "POSIXct", - "character" = "character", - "integer" = "integer", - "numeric" = "numeric", - "logical" = "logical", - # .. - "checksum" = "character", - "from_ts" = "POSIXct", - "until_ts" = "POSIXct" - ) - ) - }) - } - - if (inherits(conn, "SQLiteConnection")) { - test_that("getTableSignature() generates signature for update_snapshot() (SQLiteConnection)", { - expect_identical( - getTableSignature(data_update_snapsnot, conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMP", - "character" = "TEXT", - "integer" = "INT", - "numeric" = "DOUBLE", - "logical" = "SMALLINT", - # .. - "checksum" = "TEXT", - "from_ts" = "TIMESTAMP", - "until_ts" = "TIMESTAMP" - ) - ) - }) - } - - if (inherits(conn, "PqConnection")) { - test_that("getTableSignature() generates signature for update_snapshot() (PqConnection)", { - expect_identical( - getTableSignature(data_update_snapsnot, conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMPTZ", - "character" = "TEXT", - "integer" = "INTEGER", - "numeric" = "DOUBLE PRECISION", - "logical" = "BOOLEAN", - # .. - "checksum" = "CHAR(32)", - "from_ts" = "TIMESTAMP", - "until_ts" = "TIMESTAMP" - ) - ) - }) - } - - if (inherits(conn, "Microsoft SQL Server")) { - test_that("getTableSignature() generates signature for update_snapshot() (Microsoft SQL Server)", { - expect_identical( - getTableSignature(data_update_snapsnot, conn), - c( - "Date" = "DATE", - "POSIXct" = "DATETIME", - "character" = "varchar(255)", - "integer" = "INT", - "numeric" = "FLOAT", - "logical" = "BIT", - # .. - "checksum" = "CHAR(64)", - "from_ts" = "DATETIME", - "until_ts" = "DATETIME" - ) - ) - }) - } - - if (inherits(conn, "duckdb_connection")) { - test_that("getTableSignature() generates signature for update_snapshot() (duckdb_connection)", { - expect_identical( - getTableSignature(data_update_snapsnot, conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMP", - "character" = "STRING", - "integer" = "INTEGER", - "numeric" = "DOUBLE", - "logical" = "BOOLEAN", - # .. - "checksum" = "char(32)", - "from_ts" = "TIMESTAMP", - "until_ts" = "TIMESTAMP" - ) - ) - }) - } - - - if (is.null(conn)) { - test_that("getTableSignature() generates signature for random data (conn == NULL)", { - expect_identical( - getTableSignature(data_random, conn), - c( - "Date" = "Date", - "POSIXct" = "POSIXct", - "character" = "character", - # .. - "checksum" = "character", - "from_ts" = "POSIXct", - "until_ts" = "POSIXct", - # .. - "integer" = "integer", - "numeric" = "numeric", - "logical" = "logical" - ) - ) - }) - } - - if (inherits(conn, "SQLiteConnection")) { - test_that("getTableSignature() generates signature for random data (SQLiteConnection)", { - expect_identical( - getTableSignature(data_random, conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMP", - "character" = "TEXT", - # .. - "checksum" = "TEXT", - "from_ts" = "TIMESTAMP", - "until_ts" = "TIMESTAMP", - # .. - "integer" = "INT", - "numeric" = "DOUBLE", - "logical" = "SMALLINT" - ) - ) - }) - } - - if (inherits(conn, "PqConnection")) { - test_that("getTableSignature() generates signature for random data (PqConnection)", { - expect_identical( - getTableSignature(data_random, conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMPTZ", - "character" = "TEXT", - # .. - "checksum" = "TEXT", - "from_ts" = "TIMESTAMPTZ", - "until_ts" = "TIMESTAMPTZ", - # .. - "integer" = "INTEGER", - "numeric" = "DOUBLE PRECISION", - "logical" = "BOOLEAN" - ) - ) - }) - } - - if (inherits(conn, "Microsoft SQL Server")) { - test_that("getTableSignature() generates signature for random data (Microsoft SQL Server)", { - expect_identical( - getTableSignature(data_random, conn), - c( - "Date" = "DATE", - "POSIXct" = "DATETIME", - "character" = "varchar(255)", - # .. - "checksum" = "varchar(255)", - "from_ts" = "DATETIME", - "until_ts" = "DATETIME", - # .. - "integer" = "INT", - "numeric" = "FLOAT", - "logical" = "BIT" - ) - ) - }) - } - - if (inherits(conn, "duckdb_connection")) { - test_that("getTableSignature() generates signature for random data (duckdb_connection)", { - expect_identical( - getTableSignature(data_random, conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMP", - "character" = "STRING", - # .. - "checksum" = "STRING", - "from_ts" = "TIMESTAMP", - "until_ts" = "TIMESTAMP", - # .. - "integer" = "INTEGER", - "numeric" = "DOUBLE", - "logical" = "BOOLEAN" - ) - ) - }) - } - - - if (inherits(conn, "SQLiteConnection")) { - test_that("getTableSignature() generates signature for random data on remote (SQLiteConnection)", { - expect_identical( - getTableSignature(dplyr::copy_to(conn, data_random), conn), - c( - "Date" = "DOUBLE", # By copying to SQLite and back, information is changed by - "POSIXct" = "DOUBLE", # dbplyr / DBI so data types are now similar, but different. - "character" = "TEXT", # Dates and timestamps which are normally stored in SQLite - # .. # as internally TEXT are now converted to DOUBLE - "checksum" = "TEXT", # Logical, which have the "SMALLINT" type are now "INT" - "from_ts" = "DOUBLE", # In the next test, we check that this conversion is consistent - "until_ts" = "DOUBLE", # for the user on the local R side. - # .. - "integer" = "INT", - "numeric" = "DOUBLE", - "logical" = "INT" - ) - ) - }) - } - - if (inherits(conn, "PqConnection")) { - test_that("getTableSignature() generates signature for random data on remote (PqConnection)", { - expect_identical( - getTableSignature(dplyr::copy_to(conn, data_random), conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMPTZ", - "character" = "TEXT", - # .. - "checksum" = "TEXT", - "from_ts" = "TIMESTAMPTZ", - "until_ts" = "TIMESTAMPTZ", - # .. - "integer" = "INTEGER", - "numeric" = "DOUBLE PRECISION", - "logical" = "BOOLEAN" - ) - ) - }) - } - - if (inherits(conn, "Microsoft SQL Server")) { - test_that("getTableSignature() generates signature for random data on remote (Microsoft SQL Server)", { - expect_identical( - getTableSignature(dplyr::copy_to(conn, data_random), conn), - c( - "Date" = "DATE", - "POSIXct" = "DATETIME", - "character" = "varchar(255)", - # .. - "checksum" = "varchar(255)", - "from_ts" = "DATETIME", - "until_ts" = "DATETIME", - # .. - "integer" = "INT", - "numeric" = "FLOAT", - "logical" = "BIT" - ) - ) - }) - } - - if (inherits(conn, "duckdb_connection")) { - test_that("getTableSignature() generates signature for random data on remote (duckdb_connection)", { - expect_identical( - getTableSignature(dplyr::copy_to(conn, data_random), conn), - c( - "Date" = "DATE", - "POSIXct" = "TIMESTAMP", - "character" = "STRING", - # .. - "checksum" = "STRING", - "from_ts" = "TIMESTAMP", - "until_ts" = "TIMESTAMP", - # .. - "integer" = "INTEGER", - "numeric" = "DOUBLE", - "logical" = "BOOLEAN" - ) - ) - }) - } - - - if (!is.null(conn)) { - test_that(glue::glue("getTableSignature() generates consistent data types ({class(conn)})"), { - # This tests that the data types are consistent when copying to a remote table with getTableSignature(). - # We first copy the data to a remote table, then copy that table to another remote table on the same connection. - # The - remote_data_1 <- dplyr::copy_to( - conn, - data_random, - name = "remote_data_1", - types = getTableSignature(data_random, conn) - ) - remote_data_2 <- dplyr::copy_to( - conn, - remote_data_1, - name = "remote_data_2", - types = getTableSignature(remote_data_1, conn) - ) - - # The table signatures are not always the same (eg. SQLiteConnection). - if (inherits(conn, "SQLiteConnection")) { - expect_false(identical( # In lieu of expect_not_identical - getTableSignature(data_random, conn), - getTableSignature(remote_data_1, conn) - )) - expect_identical( # nolint: expect_named_linter - names(getTableSignature(data_random, conn)), - names(getTableSignature(remote_data_1, conn)) - ) - } else { - expect_identical( - getTableSignature(data_random, conn), - getTableSignature(remote_data_1, conn) - ) - } - - # But the data, when transfered locally, should be the same - expect_identical(dplyr::collect(remote_data_2), dplyr::collect(remote_data_1)) - }) - } - - if (!is.null(conn)) connection_clean_up(conn) -} diff --git a/tests/testthat/test-get_schema.R b/tests/testthat/test-get_schema.R deleted file mode 100644 index fa99269c..00000000 --- a/tests/testthat/test-get_schema.R +++ /dev/null @@ -1,130 +0,0 @@ -test_that("get_schema() and get_catalog() works for tbl_dbi", { - for (conn in get_test_conns()) { - - # Check for permanent tables in default schema - table <- dplyr::tbl(conn, "__mtcars") - expect_identical(get_schema(table), get_schema(conn)) - expect_identical(get_catalog(table), get_catalog(conn)) - - table_id_inferred <- DBI::Id( - catalog = get_catalog(table), - schema = get_schema(table), - table = "__mtcars" - ) - - expect_identical( - dplyr::collect(dplyr::tbl(conn, table_id_inferred)), - dplyr::collect(table) - ) - - - # Check for temporary tables - table_name <- unique_table_name() - table <- dplyr::copy_to(conn, mtcars, table_name, temporary = TRUE) - expect_identical(get_schema(table), get_schema(conn, temporary = TRUE)) - expect_identical(get_catalog(table), get_catalog(conn, temporary = TRUE)) - - table_id_inferred <- DBI::Id( - catalog = get_catalog(table), - schema = get_schema(table), - table = paste0(ifelse(inherits(conn, "Microsoft SQL Server"), "#", ""), table_name) - ) - - expect_identical( - dplyr::collect(dplyr::tbl(conn, table_id_inferred)), - dplyr::collect(table) - ) - - connection_clean_up(conn) - } -}) - - -test_that("get_schema() works for Id", { - expect_null(get_schema(DBI::Id(table = "table"))) - expect_identical(get_schema(DBI::Id(schema = "schema", table = "table")), "schema") -}) - - -test_that("get_catalog() works for Id", { - expect_null(get_catalog(DBI::Id(table = "table"))) - expect_identical(get_catalog(DBI::Id(catalog = "catalog", table = "table")), "catalog") -}) - - -test_that("get_schema() works for NULL", { - expect_null(get_schema(NULL)) -}) - - -test_that("get_catalog() works for NULL", { - expect_null(get_catalog(NULL)) -}) - - -for (conn in c(list(NULL), get_test_conns())) { - - if (is.null(conn)) { - test_that("get_schema() works for NULL connection", { - expect_null(get_schema(conn)) - expect_null(get_schema(conn, temporary = TRUE)) - }) - } - - if (inherits(conn, "SQLiteConnection")) { - test_that("get_schema() works for SQLiteConnection", { - expect_identical(get_schema(conn), "main") - expect_identical(get_schema(conn, temporary = TRUE), "temp") - }) - } - - if (inherits(conn, "PqConnection")) { - test_that("get_schema() works for PqConnection", { - expect_identical(get_schema(conn), "public") - checkmate::expect_character(get_schema(conn, temporary = TRUE), pattern = "^pg_temp_.*", len = 1) - }) - } - - if (inherits(conn, "Microsoft SQL Server")) { - test_that("get_schema() works for Microsoft SQL Server", { - expect_identical(get_schema(conn), "dbo") - expect_identical(get_schema(conn, temporary = TRUE), "dbo") - }) - } - - if (!is.null(conn)) connection_clean_up(conn) -} - - -for (conn in c(list(NULL), get_test_conns())) { - - if (is.null(conn)) { - test_that("get_catalog() works for NULL connection", { - expect_null(get_catalog(conn)) - expect_null(get_catalog(conn, temporary = TRUE)) - }) - } - - if (inherits(conn, "SQLiteConnection")) { - test_that("get_catalog() works for SQLiteConnection", { - expect_null(get_catalog(conn)) - expect_null(get_catalog(conn, temporary = TRUE)) - }) - } - - if (inherits(conn, "PqConnection")) { - test_that("get_catalog() works for PqConnection", { - expect_null(get_catalog(conn)) - expect_null(get_catalog(conn, temporary = TRUE)) - }) - } - - if (inherits(conn, "Microsoft SQL Server")) { - test_that("get_catalog() works for Microsoft SQL Server", { - expect_identical(get_catalog(conn), "master") - expect_identical(get_catalog(conn, temporary = TRUE), "tempdb") - }) - } - - if (!is.null(conn)) connection_clean_up(conn) -} diff --git a/tests/testthat/test-get_tables.R b/tests/testthat/test-get_tables.R index 2b09c446..633cd433 100644 --- a/tests/testthat/test-get_tables.R +++ b/tests/testthat/test-get_tables.R @@ -80,6 +80,7 @@ test_that("get_tables() works with temporary tables", { # Create temporary table tmp <- dplyr::copy_to(conn, mtcars, "__mtcars_2", temporary = TRUE) + defer_db_cleanup(tmp) tmp_id <- id(tmp) tmp_name <- paste(tmp_id@name["schema"], tmp_id@name["table"], sep = ".") @@ -110,6 +111,7 @@ test_that("get_tables() works without temporary tables", { # Create temporary table tmp <- dplyr::copy_to(conn, mtcars, "__mtcars_2", temporary = TRUE) + defer_db_cleanup(tmp) tmp_id <- id(tmp) tmp_name <- paste(tmp_id@name["schema"], tmp_id@name["table"], sep = ".") diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R deleted file mode 100644 index b9d538b3..00000000 --- a/tests/testthat/test-helpers.R +++ /dev/null @@ -1,63 +0,0 @@ -test_that("nrow() works", { - for (conn in get_test_conns()) { - x <- get_table(conn, "__mtcars") - - expect_identical(nrow(x), as.integer(dplyr::pull(dplyr::count(x)))) - expect_identical(nrow(x), nrow(mtcars)) - - connection_clean_up(conn) - } -}) - - -test_that("defer_db_cleanup() works in function call", { - for (conn in get_test_conns()) { - name <- unique_table_name() - - test <- function() { - mt <- dplyr::copy_to(conn, mtcars, name, temporary = FALSE) - expect_true(DBI::dbExistsTable(conn, id(name, conn))) - - defer_db_cleanup(mt) - expect_true(DBI::dbExistsTable(conn, id(name, conn))) - } - - test() - - expect_false(DBI::dbExistsTable(conn, id(name, conn))) - - connection_clean_up(conn) - } -}) - - -test_that("defer_db_cleanup() works with withr::deferred_run", { - for (conn in get_test_conns()) { - mt <- dplyr::copy_to(conn, mtcars, unique_table_name()) - mt_id <- id(mt) - defer_db_cleanup(mt) - - expect_true(DBI::dbExistsTable(conn, mt_id)) - - expect_message(withr::deferred_run(), "Ran 1/1 deferred expressions") - - expect_false(DBI::dbExistsTable(conn, mt_id)) - - connection_clean_up(conn) - } -}) - - -test_that("unique_table_name() works", { - table_1 <- unique_table_name() - table_2 <- unique_table_name() - checkmate::expect_character(table_1, pattern = "SCDB_[a-zA-Z0-9]{10}") - checkmate::expect_character(table_2, pattern = "SCDB_[a-zA-Z0-9]{10}") - checkmate::expect_disjunct(table_1, table_2) - - table_1 <- unique_table_name("test") - table_2 <- unique_table_name("test") - checkmate::expect_character(table_1, pattern = "test_[a-zA-Z0-9]{10}") - checkmate::expect_character(table_2, pattern = "test_[a-zA-Z0-9]{10}") - checkmate::expect_disjunct(table_1, table_2) -}) diff --git a/tests/testthat/test-id.R b/tests/testthat/test-id.R deleted file mode 100644 index 4620acac..00000000 --- a/tests/testthat/test-id.R +++ /dev/null @@ -1,165 +0,0 @@ -test_that("id() works for character input without implied schema", { - for (conn in get_test_conns()) { - - # Without schema, we expect: - - # ... no change of no conn is given - expect_identical(id("test_mtcars"), DBI::Id(table = "test_mtcars")) - - # .. the defaults schema if conn is given - expect_identical( - id("test_mtcars", conn), - DBI::Id(catalog = get_catalog(conn), schema = SCDB::get_schema(conn), table = "test_mtcars") - ) - - connection_clean_up(conn) - } -}) - - -test_that("id() works for character input with implied schema", { - # With schema we expect the implied schema.table to be resolved: - - # ... when no conn is given, we naively assume schema.table holds true - expect_identical(id("test.mtcars"), DBI::Id(schema = "test", table = "mtcars")) - - - for (conn in get_test_conns()) { - - # ... when conn is given, we check if implied schema exists. - # NOTE: All testing connections should have the schema "test" (except SQLite without attached schemas) - # therefore, in almost all cases, we should resolve the schema correctly (except the SQLite case above) - if (inherits(conn, "SQLiteConnection") && !schema_exists(conn, "test")) { - expect_identical(id("test.mtcars", conn), DBI::Id(schema = "main", table = "test.mtcars")) - } else { - expect_identical(id("test.mtcars", conn), DBI::Id(catalog = get_catalog(conn), schema = "test", table = "mtcars")) - } - - connection_clean_up(conn) - } -}) - - -test_that("id() works for character input with implied schema when schema does not exist", { - for (conn in get_test_conns()) { - - # Generate schema that does not exist - k <- 0 - while (k < 100) { - invalid_schema_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") - k <- k + 1 - if (schema_exists(conn, invalid_schema_name)) next - break - } - - if (k < 100) { - - table_name <- paste(invalid_schema_name, "mtcars", sep = ".") - - # When schema does not exist and allow_table_only is TRUE, the schema should be the default schema - expect_identical( - id(table_name, conn = conn, allow_table_only = TRUE), - DBI::Id(catalog = get_catalog(conn), schema = get_schema(conn), table = table_name) - ) - - # When schema does not exist and allow_table_only is FALSE, the schema should be as implied - expect_identical( - id(table_name, conn = conn, allow_table_only = FALSE), - DBI::Id(catalog = get_catalog(conn), schema = invalid_schema_name, table = "mtcars") - ) - - } else { - warning("Non-existing schema could not be generated!", call. = FALSE) - } - - connection_clean_up(conn) - - # When connection is closed, the existence of the schema cannot be validated and an error should be given - expect_error(id(table_name, conn = conn), "DBI::dbIsValid\\(conn\\): FALSE") - } -}) - - -test_that("id() works for DBI::Id inputs", { - for (conn in get_test_conns()) { - - # When passing an Id without a schema, id should enrich the Id with the default schema - expect_identical( - id(DBI::Id(table = "mtcars"), conn), - DBI::Id(catalog = get_catalog(conn), schema = get_schema(conn), table = "mtcars") - ) - - connection_clean_up(conn) - } -}) - - -test_that("id() is consistent for tbl_dbi inputs", { - for (conn in get_test_conns()) { - - expectation <- id(dplyr::tbl(conn, id("test.mtcars", conn))) - - expect_identical( - expectation, - id.tbl_dbi(dplyr::tbl(conn, id("test.mtcars", conn))) - ) - - connection_clean_up(conn) - } -}) - - -test_that("id() is gives informative error for manipulated tbl_dbi inputs", { - for (conn in get_test_conns()) { - - expect_error( - id(dplyr::mutate(dplyr::tbl(conn, "__mtcars"), a = 2)), - "Table identification can only be determined if the lazy query is unmodified" - ) - - connection_clean_up(conn) - } -}) - - -test_that("id() works for data.frame inputs", { - for (conn in get_test_conns()) { - - # Output of get_tables should be parsable by id - db_table <- utils::head(get_tables(conn), 1) - - db_table_id <- expect_no_error(id(db_table)) - - # And it should have the corresponding fields, which we here check by comparing the string representations - expect_identical( - as.character(db_table_id), - paste(unlist(db_table), collapse = ".") - ) - - connection_clean_up(conn) - } -}) - - -test_that("as.character.id() works with implicit output", { - expect_identical(as.character(DBI::Id(table = "table")), "table") - expect_identical(as.character(DBI::Id(schema = "schema", table = "table")), "schema.table") - expect_identical(as.character(DBI::Id(catalog = "catalog", schema = "schema", table = "table")), - "catalog.schema.table") - - expect_identical(as.character(DBI::Id(table = "table", schema = "schema")), "schema.table") - expect_identical(as.character(DBI::Id(table = "table", schema = "schema", catalog = "catalog")), - "catalog.schema.table") -}) - - -test_that("as.character.id() works with explicit output", { - expect_identical(as.character(DBI::Id(table = "table"), explicit = TRUE), "\"table\"") - expect_identical(as.character(DBI::Id(schema = "schema", table = "table"), explicit = TRUE), "\"schema\".\"table\"") - expect_identical(as.character(DBI::Id(catalog = "catalog", schema = "schema", table = "table"), explicit = TRUE), - "\"catalog\".\"schema\".\"table\"") - - expect_identical(as.character(DBI::Id(table = "table", schema = "schema"), explicit = TRUE), "\"schema\".\"table\"") - expect_identical(as.character(DBI::Id(table = "table", schema = "schema", catalog = "catalog"), explicit = TRUE), - "\"catalog\".\"schema\".\"table\"") -}) diff --git a/tests/testthat/test-interlace.R b/tests/testthat/test-interlace.R deleted file mode 100644 index a85f1787..00000000 --- a/tests/testthat/test-interlace.R +++ /dev/null @@ -1,53 +0,0 @@ -test_that("interlace.tbl_sql() works", { - for (conn in get_test_conns()) { - - t1 <- data.frame(key = c("A", "A", "B"), - obs_1 = c(1, 2, 2), - valid_from = as.Date(c("2021-01-01", "2021-02-01", "2021-01-01")), - valid_until = as.Date(c("2021-02-01", "2021-03-01", NA))) - - - t2 <- data.frame(key = c("A", "B"), - obs_2 = c("a", "b"), - valid_from = as.Date(c("2021-01-01", "2021-01-01")), - valid_until = as.Date(c("2021-04-01", NA))) - - - t_ref <- data.frame(key = c("A", "A", "A", "B"), - obs_1 = c(1, 2, NA, 2), - obs_2 = c("a", "a", "a", "b"), - valid_from = as.Date(c("2021-01-01", "2021-02-01", "2021-03-01", "2021-01-01")), - valid_until = as.Date(c("2021-02-01", "2021-03-01", "2021-04-01", NA))) - - - # Copy t1, t2 and t_ref to conn - t1 <- dplyr::copy_to(conn, t1, name = id("test.SCDB_tmp1", conn), overwrite = TRUE, temporary = FALSE) - t2 <- dplyr::copy_to(conn, t2, name = id("test.SCDB_tmp2", conn), overwrite = TRUE, temporary = FALSE) - t_ref <- dplyr::copy_to(conn, t_ref, name = id("test.SCDB_tmp3", conn), overwrite = TRUE, temporary = FALSE) - - - # Order of records may be different, so we arrange then check if they are identical - expect_identical(interlace(list(t1, t2), by = "key") %>% - dplyr::collect() %>% - dplyr::arrange(.data$key, .data$valid_from), - t_ref %>% - dplyr::collect() %>% - dplyr::arrange(.data$key, .data$valid_from)) - - # Order of columns will be different, so we only require a mapequal - # .. but order of records can still be different - expect_mapequal(interlace(list(t1, t2), by = "key") %>% - dplyr::collect() %>% - dplyr::arrange(.data$key, .data$valid_from), - interlace(list(t2, t1), by = "key") %>% - dplyr::collect() %>% - dplyr::arrange(.data$key, .data$valid_from)) - - connection_clean_up(conn) - } -}) - - -test_that("interlace returns early if length(table) == 1", { - expect_identical(mtcars["mpg"], interlace(list(mtcars["mpg"]), by = "mpg")) -}) diff --git a/tests/testthat/test-schema_exists.R b/tests/testthat/test-schema_exists.R deleted file mode 100644 index e05102bf..00000000 --- a/tests/testthat/test-schema_exists.R +++ /dev/null @@ -1,19 +0,0 @@ -test_that("schema_exists() works", { - conns <- get_test_conns() - for (conn_id in seq_along(conns)) { - - conn <- conns[[conn_id]] - - # Not all data bases support schemas. - # Here we filter out the data bases that do not support schema - # NOTE: SQLite does support schema, but we test both with and without attaching schemas - if (names(conns)[[conn_id]] != "SQLite") { - expect_true(schema_exists(conn, "test")) - - random_string <- paste(sample(c(letters, LETTERS), size = 16, replace = TRUE), collapse = "") - expect_false(schema_exists(conn, random_string)) - } - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-slice_time.R b/tests/testthat/test-slice_time.R deleted file mode 100644 index 10de7437..00000000 --- a/tests/testthat/test-slice_time.R +++ /dev/null @@ -1,34 +0,0 @@ -test_that("slice_time() works", { - for (conn in get_test_conns()) { - - # SQLite does not work with dates. But since we use ISO 8601 for dates, we can compare lexicographically - xx <- get_table(conn, "__mtcars") %>% - dplyr::mutate(checksum = dplyr::row_number(), - from_ts = dplyr::if_else(checksum <= 20, "2022-06-01", "2022-06-15"), - until_ts = NA_character_) - - expect_identical(nrow(slice_time(xx, "2022-05-01")), 0L) - expect_identical(nrow(slice_time(xx, "2022-06-01")), 20L) - expect_identical(nrow(slice_time(xx, "2022-06-15")), nrow(mtcars)) - - connection_clean_up(conn) - } -}) - - -test_that("slice_time() works with non-standard columns", { - for (conn in get_test_conns()) { - - # SQLite does not work with dates. But since we use ISO 8601 for dates, we can compare lexicographically - xx <- get_table(conn, "__mtcars") %>% - dplyr::mutate(checksum = dplyr::row_number(), - valid_from = dplyr::if_else(checksum <= 20, "2022-06-01", "2022-06-15"), - valid_until = NA_character_) - - expect_identical(nrow(slice_time(xx, "2022-05-01", from_ts = "valid_from", until_ts = "valid_until")), 0L) - expect_identical(nrow(slice_time(xx, "2022-06-01", from_ts = "valid_from", until_ts = "valid_until")), 20L) - expect_identical(nrow(slice_time(xx, "2022-06-15", from_ts = "valid_from", until_ts = "valid_until")), nrow(mtcars)) - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-table_exists.R b/tests/testthat/test-table_exists.R deleted file mode 100644 index 3c835e47..00000000 --- a/tests/testthat/test-table_exists.R +++ /dev/null @@ -1,111 +0,0 @@ -test_that("table_exists() works for default schema", { - for (conn in get_test_conns()) { - - # Generate table in default schema that does not exist - k <- 0 - while (k < 100) { - invalid_table_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") - k <- k + 1 - if (DBI::dbExistsTable(conn, id(invalid_table_name, conn))) next - break - } - - if (k < 100) { - - # Without explicit schema, table_exists assumes default schema - expect_true(table_exists(conn, "__mtcars")) - expect_false(table_exists(conn, invalid_table_name)) - - expect_true(table_exists(conn, DBI::Id(table = "__mtcars"))) - expect_false(table_exists(conn, DBI::Id(table = invalid_table_name))) - - # Using the default schema should therefore yield the same results - expect_true(table_exists(conn, paste(get_schema(conn), "__mtcars", sep = "."))) - expect_false(table_exists(conn, paste(get_schema(conn), invalid_table_name, sep = "."))) - - expect_true(table_exists(conn, DBI::Id(schema = get_schema(conn), table = "__mtcars"))) - expect_false(table_exists(conn, DBI::Id(schema = get_schema(conn), table = invalid_table_name))) - - } else { - warning("Non-existing table in default schema could not be generated!", call. = FALSE) - } - - connection_clean_up(conn) - } -}) - - -test_that("table_exists() works for non-default schema", { - for (conn in get_test_conns()) { - - # Generate schema that does not exist - k <- 0 - while (k < 100) { - invalid_schema_name <- paste(sample(letters, size = 16, replace = TRUE), collapse = "") - k <- k + 1 - if (schema_exists(conn, invalid_schema_name)) next - break - } - - if (k < 100) { - - # With an implied schema, table_exists should still determine existence correctly - - # Character inputs - expect_true(table_exists(conn, "test.mtcars")) - expect_false(table_exists(conn, paste(invalid_schema_name, "mtcars", sep = "."))) - - - # DBI::Id inputs - if (schema_exists(conn, "test")) { - expect_true(table_exists(conn, DBI::Id(schema = "test", table = "mtcars"))) - } else { - expect_false(table_exists(conn, DBI::Id(schema = "test", table = "mtcars"))) - } - expect_false(table_exists(conn, DBI::Id(schema = invalid_schema_name, table = "mtcars"))) - - } else { - warning("Non-existing schema could not be generated!", call. = FALSE) - } - - connection_clean_up(conn) - } -}) - - -test_that("table_exists() fails when multiple matches are found", { - for (conn in get_test_conns()) { - - # Not all data bases support schemas. - # Here we filter out the data bases that do not support schema - # NOTE: SQLite does support schema, but we test both with and without attaching schemas - if (schema_exists(conn, "test") && schema_exists(conn, "test.one")) { - - DBI::dbExecute(conn, 'CREATE TABLE "test"."one.two"(a TEXT)') - DBI::dbExecute(conn, 'CREATE TABLE "test.one"."two"(b TEXT)') - - expect_error( - table_exists(conn, "test.one.two"), - regex = "More than one table matching 'test.one.two' was found!" - ) - - } - - connection_clean_up(conn) - } -}) - - -test_that("table_exists() works when starting from empty", { - skip_if_not_installed("RSQLite") - - conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - - expect_false(table_exists(conn, "mtcars")) - - dplyr::copy_to(conn, mtcars, "mtcars", temporary = FALSE) - - expect_true(table_exists(conn, "mtcars")) - - connection_clean_up(conn) -}) diff --git a/tests/testthat/test-unite.tbl_dbi.R b/tests/testthat/test-unite.tbl_dbi.R deleted file mode 100644 index 9e524adf..00000000 --- a/tests/testthat/test-unite.tbl_dbi.R +++ /dev/null @@ -1,63 +0,0 @@ -test_that("unite.tbl_dbi() works", { - for (conn in get_test_conns()) { - - q <- get_table(conn, "__mtcars") %>% utils::head(1) - qu_remove <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", mpg, hp) %>% - dplyr::compute(name = unique_table_name()) - qu <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", mpg, hp, remove = FALSE) %>% - dplyr::compute(name = unique_table_name()) - qu_alt <- tidyr::unite(dplyr::select(q, mpg, hp), "new_column", "mpg", "hp", remove = FALSE) %>% - dplyr::compute(name = unique_table_name()) - - expect_s3_class(qu_remove, "tbl_dbi") - expect_s3_class(qu, "tbl_dbi") - expect_s3_class(qu_alt, "tbl_dbi") - - expect_identical(colnames(qu_remove), "new_column") - expect_identical(colnames(qu), c("new_column", "mpg", "hp")) - expect_identical(colnames(qu_alt), c("new_column", "mpg", "hp")) - - expect_identical(dplyr::collect(qu), dplyr::collect(qu_alt)) - - # tidyr::unite has some quirky (and FUN!!! behavior) that we are forced to match here - # specifically, the input "col" is converted to a symbol, so we have to do escape-bullshit - # NOTE: the line "dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) %>% " - # is to account for SQLite not having integer data-types. If we do not first convert to character, - # there will be differences between the objects that are trivial, so we remove these with this operation - # this way, the test should (hopefully) only fail if there are non-trivial differences - expect_mapequal(get_table(conn, "__mtcars") %>% - tidyr::unite("new_col", mpg, hp) %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) %>% - dplyr::collect(), - get_table(conn, "__mtcars") %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) %>% - dplyr::collect() %>% - tidyr::unite("new_col", mpg, hp)) - - col <- "new_col" - expect_mapequal(get_table(conn, "__mtcars") %>% - tidyr::unite(col, mpg, hp) %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) %>% - dplyr::collect(), - get_table(conn, "__mtcars") %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) %>% - dplyr::collect() %>% - tidyr::unite(col, mpg, hp)) - - expect_mapequal(get_table(conn, "__mtcars") %>% - tidyr::unite(!!col, mpg, hp) %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) %>% - dplyr::collect(), - get_table(conn, "__mtcars") %>% - dplyr::mutate(dplyr::across(tidyselect::everything(), as.character)) %>% - dplyr::collect() %>% - tidyr::unite(!!col, mpg, hp)) - - # Unite places cols in a particular way, lets be sure we match - qq <- dplyr::mutate(q, dplyr::across(tidyselect::everything(), as.character)) # we convert to character since SQLite - expect_identical(qq %>% tidyr::unite("test_col", vs, am) %>% dplyr::collect(), - qq %>% dplyr::collect() %>% tidyr::unite("test_col", vs, am)) - - connection_clean_up(conn) - } -}) diff --git a/tests/testthat/test-update_snapshot.R b/tests/testthat/test-update_snapshot.R deleted file mode 100644 index c0824805..00000000 --- a/tests/testthat/test-update_snapshot.R +++ /dev/null @@ -1,650 +0,0 @@ -test_that("update_snapshot() can handle first snapshot", { - for (conn in get_test_conns()) { - - if (DBI::dbExistsTable(conn, id("test.SCDB_tmp1", conn))) DBI::dbRemoveTable(conn, id("test.SCDB_tmp1", conn)) - if (DBI::dbExistsTable(conn, id("test.SCDB_logs", conn))) DBI::dbRemoveTable(conn, id("test.SCDB_logs", conn)) - expect_false(table_exists(conn, "test.SCDB_tmp1")) - expect_false(table_exists(conn, "test.SCDB_logs")) - - # Use unmodified mtcars as the initial snapshot - .data <- mtcars %>% - dplyr::copy_to(conn, df = ., name = unique_table_name()) - - # Configure the logger for this update - db_table <- "test.SCDB_tmp1" - timestamp <- "2022-10-01 09:00:00" - log_path <- tempdir() - - # Ensure all logs are removed - dir(log_path) %>% - purrr::keep(~ endsWith(., ".log")) %>% - purrr::walk(~ unlink(file.path(log_path, .))) - - logger <- Logger$new( - db_table = db_table, - timestamp = timestamp, - log_path = log_path, - log_table_id = "test.SCDB_logs", - log_conn = conn, - output_to_console = FALSE - ) - - # Update - update_snapshot(.data, conn, db_table, timestamp, logger = logger) - - # Confirm snapshot is transferred correctly - expect_identical( - get_table(conn, db_table) %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec), - .data %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec) - ) - - - ### For this test, we also check that the log output is correct ### - # Check file log outputs exists - log_pattern <- glue::glue("{stringr::str_replace_all(as.Date(timestamp), '-', '_')}.{id(db_table, conn)}.log") - log_file <- purrr::keep(dir(log_path), ~ stringr::str_detect(., log_pattern)) - expect_length(log_file, 1) - expect_gt(file.info(file.path(log_path, log_file))$size, 0) - expect_identical(nrow(get_table(conn, "test.SCDB_logs")), 1L) - - db_logs_with_log_file <- get_table(conn, "test.SCDB_logs") %>% - dplyr::filter(!is.na(.data$log_file)) - expect_identical(nrow(db_logs_with_log_file), 1L) - - # Check database log output - logs <- get_table(conn, "test.SCDB_logs") %>% dplyr::collect() - - # The logs should have specified data types - types <- c( - "date" = "POSIXct", - "catalog" = "character", - "schema" = "character", - "table" = "character", - "n_insertions" = "numeric", - "n_deactivations" = "numeric", - "start_time" = "POSIXct", - "end_time" = "POSIXct", - "duration" = "character", - "success" = "logical", - "message" = "character" - ) - - if (inherits(conn, "SQLiteConnection")) { - types <- types %>% - purrr::map_if(~ identical(., "POSIXct"), "character") %>% # SQLite does not support POSIXct - purrr::map_if(~ identical(., "logical"), "numeric") %>% # SQLite does not support logical - as.character() - } - - checkmate::expect_data_frame(logs, nrows = 1, types) - - # Check the content of the log table - expect_identical(as.character(logs$date), as.character(timestamp)) - - db_table_id <- id(db_table, conn) - if ("catalog" %in% colnames(logs)) expect_identical(logs$catalog, purrr::pluck(db_table_id, "name", "catalog")) - expect_identical(logs$schema, purrr::pluck(db_table_id, "name", "schema")) - expect_identical(logs$table, purrr::pluck(db_table_id, "name", "table")) - - expect_identical(logs$n_insertions, nrow(mtcars)) - expect_identical(logs$n_deactivations, 0L) - expect_true(as.logical(logs$success)) - expect_identical(logs$message, NA_character_) - - - # Clean up the logs - unlink(logger$log_realpath) - - close_connection(conn) - } -}) - -test_that("update_snapshot() can add a new snapshot", { - for (conn in get_test_conns()) { - - # Modify snapshot and run update step - .data <- mtcars %>% - dplyr::mutate(hp = dplyr::if_else(hp > 130, hp - 10, hp)) %>% - dplyr::copy_to(conn, df = ., name = unique_table_name()) - - # Configure the logger for this update - db_table <- "test.SCDB_tmp1" - timestamp <- "2022-10-03 09:00:00" - - logger <- Logger$new( - db_table = db_table, - timestamp = timestamp, - log_path = NULL, - log_table_id = "test.SCDB_logs", - log_conn = conn, - output_to_console = FALSE - ) - - - # Update - # This is a simple update where 15 rows are replaced with 15 new ones on the given date - update_snapshot(.data, conn, db_table, timestamp, logger = logger) - - # Check the snapshot has updated correctly - target <- dplyr::tbl(conn, id(db_table, conn)) - expect_identical( - slice_time(target, "2022-10-01 09:00:00") %>% - dplyr::select(!c("from_ts", "until_ts", "checksum")) %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec), - mtcars %>% - dplyr::arrange(wt, qsec) %>% - tibble::as_tibble() - ) - expect_identical( - nrow(slice_time(target, "2022-10-01 09:00:00")), - nrow(mtcars) - ) - - expect_identical( - slice_time(target, "2022-10-03 09:00:00") %>% - dplyr::select(!c("from_ts", "until_ts", "checksum")) %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec), - .data %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec) - ) - expect_identical( - nrow(slice_time(target, "2022-10-03 09:00:00")), - nrow(mtcars) - ) - - - # Check database log output - logs <- get_table(conn, "test.SCDB_logs") %>% - dplyr::collect() %>% - utils::tail(1) - - expect_identical(logs$n_insertions, 15L) - expect_identical(logs$n_deactivations, 15L) - expect_true(as.logical(logs$success)) - - close_connection(conn) - } -}) - -test_that("update_snapshot() can update a snapshot on an existing date", { - for (conn in get_test_conns()) { - - # We now attempt to do another update on the same date - .data <- mtcars %>% - dplyr::mutate(hp = dplyr::if_else(hp > 100, hp - 10, hp)) %>% - dplyr::copy_to(conn, df = ., name = unique_table_name()) - - # Configure the logger for this update - db_table <- "test.SCDB_tmp1" - timestamp <- "2022-10-03 09:00:00" - - logger <- Logger$new( - db_table = db_table, - timestamp = timestamp, - log_path = NULL, - log_table_id = "test.SCDB_logs", - log_conn = conn, - output_to_console = FALSE - ) - - - # This is a more complicated update where a further 8 rows are replaced with 8 new ones on the same date as before - update_snapshot(.data, conn, db_table, "2022-10-03 09:00:00", logger = logger) - - # Even though we insert twice on the same date, we expect the data to be minimal (compacted) - target <- dplyr::tbl(conn, id(db_table, conn)) - expect_identical( - slice_time(target, "2022-10-01 09:00:00") %>% - dplyr::select(!c("from_ts", "until_ts", "checksum")) %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec), - mtcars %>% - dplyr::arrange(wt, qsec) %>% - tibble::tibble() - ) - expect_identical( - nrow(slice_time(target, "2022-10-01 09:00:00")), - nrow(mtcars) - ) - - expect_identical( - slice_time(target, "2022-10-03 09:00:00") %>% - dplyr::select(!c("from_ts", "until_ts", "checksum")) %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec), - .data %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec) - ) - expect_identical( - nrow(slice_time(target, "2022-10-03 09:00:00")), - nrow(mtcars) - ) - - - # Check database log output - logs <- get_table(conn, "test.SCDB_logs") %>% - dplyr::collect() %>% - utils::tail(1) - - expect_identical(logs$n_insertions, 8L) - expect_identical(logs$n_deactivations, 8L) - expect_true(as.logical(logs$success)) - - close_connection(conn) - } -}) - -test_that("update_snapshot() can insert a snapshot between existing dates", { - for (conn in get_test_conns()) { - - # We now attempt to an update between these two updates - .data <- mtcars %>% - dplyr::mutate(hp = dplyr::if_else(hp > 150, hp - 10, hp)) %>% - dplyr::copy_to(conn, df = ., name = unique_table_name()) - - # This should fail if we do not specify "enforce_chronological_order = FALSE" - expect_error( - update_snapshot(.data, conn, "test.SCDB_tmp1", "2022-10-02 09:00:00", logger = LoggerNull$new()), - regexp = "Given timestamp 2022-10-02 09:00:00 is earlier" - ) - - # But not with the variable set - update_snapshot(.data, conn, "test.SCDB_tmp1", "2022-10-02 09:00:00", - logger = LoggerNull$new(), enforce_chronological_order = FALSE) - - - target <- dplyr::tbl(conn, id("test.SCDB_tmp1", conn)) - expect_identical( - slice_time(target, "2022-10-01 09:00:00") %>% - dplyr::select(!c("from_ts", "until_ts", "checksum")) %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec), - mtcars %>% - dplyr::arrange(wt, qsec) %>% - tibble::tibble() - ) - expect_identical( - nrow(slice_time(target, "2022-10-01 09:00:00")), - nrow(mtcars) - ) - - expect_identical( - slice_time(target, "2022-10-02 09:00:00") %>% - dplyr::select(!c("from_ts", "until_ts", "checksum")) %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec), - .data %>% - dplyr::collect() %>% - dplyr::arrange(wt, qsec) - ) - expect_identical( - nrow(slice_time(target, "2022-10-02 09:00:00")), - nrow(mtcars) - ) - - close_connection(conn) - } -}) - - - -test_that("update_snapshot() works (holistic test 1)", { - for (conn in get_test_conns()) { - - if (DBI::dbExistsTable(conn, id("test.SCDB_tmp1", conn))) DBI::dbRemoveTable(conn, id("test.SCDB_tmp1", conn)) - expect_false(table_exists(conn, "test.SCDB_tmp1")) - - - # Create test data for the test - t0 <- data.frame(col1 = c("A", "B"), col2 = c(NA_real_, NA_real_)) - t1 <- data.frame(col1 = c("A", "B", "C"), col2 = c(1, NA_real_, NA_real_)) - t2 <- data.frame(col1 = c("A", "B", "C"), col2 = c(1, 2, 3)) - - # Copy t0, t1, and t2 to conn - t0 <- dplyr::copy_to(conn, t0, name = id("test.SCDB_t0", conn), overwrite = TRUE, temporary = FALSE) - t1 <- dplyr::copy_to(conn, t1, name = id("test.SCDB_t1", conn), overwrite = TRUE, temporary = FALSE) - t2 <- dplyr::copy_to(conn, t2, name = id("test.SCDB_t2", conn), overwrite = TRUE, temporary = FALSE) - - logger <- LoggerNull$new() - update_snapshot(t0, conn, "test.SCDB_tmp1", "2022-01-01 08:00:00", logger = logger) - expect_identical( - dplyr::collect(t0) %>% dplyr::arrange(col1), - dplyr::collect(get_table(conn, "test.SCDB_tmp1")) %>% dplyr::arrange(col1) - ) - - update_snapshot( - t1, - conn, - "test.SCDB_tmp1", - "2022-01-01 08:10:00", - logger = logger, - collapse_continuous_records = TRUE - ) - expect_identical( - dplyr::collect(t1) %>% dplyr::arrange(col1), - dplyr::collect(get_table(conn, "test.SCDB_tmp1")) %>% dplyr::arrange(col1) - ) - - update_snapshot( - t2, - conn, - "test.SCDB_tmp1", - "2022-01-01 08:10:00", - logger = logger, - collapse_continuous_records = TRUE - ) - expect_identical( - dplyr::collect(t2) %>% dplyr::arrange(col1), - dplyr::collect(get_table(conn, "test.SCDB_tmp1")) %>% dplyr::arrange(col1) - ) - - t <- list(t0, t1, t2) %>% - purrr::reduce(dplyr::union) %>% - dplyr::collect() %>% - dplyr::mutate(col2 = as.character(col2)) %>% - dplyr::arrange(col1, col2) %>% - utils::head(5) - - t_ref <- get_table(conn, "test.SCDB_tmp1", slice_ts = NULL) %>% - dplyr::select(!any_of(c("from_ts", "until_ts", "checksum"))) %>% - dplyr::collect() %>% - dplyr::mutate(col2 = as.character(col2)) %>% - dplyr::arrange(col1, col2) - - expect_identical(t, t_ref) - - close_connection(conn) - } -}) - -test_that("update_snapshot() works (holistic test 2)", { - for (conn in get_test_conns()) { - - if (DBI::dbExistsTable(conn, id("test.SCDB_tmp1", conn))) DBI::dbRemoveTable(conn, id("test.SCDB_tmp1", conn)) - expect_false(table_exists(conn, "test.SCDB_tmp1")) - - - # Create test data for the test - t0 <- data.frame(col1 = c("A", "B"), col2 = c(NA_real_, NA_real_)) - t1 <- data.frame(col1 = c("A", "B", "C"), col2 = c(1, NA_real_, NA_real_)) - t2 <- data.frame(col1 = c("A", "B", "C"), col2 = c(1, 2, 3)) - - # Copy t0, t1, and t2 to conn (and suppress check_from message) - t0 <- dplyr::copy_to(conn, t0, name = id("test.SCDB_t0", conn), overwrite = TRUE, temporary = FALSE) - t1 <- dplyr::copy_to(conn, t1, name = id("test.SCDB_t1", conn), overwrite = TRUE, temporary = FALSE) - t2 <- dplyr::copy_to(conn, t2, name = id("test.SCDB_t2", conn), overwrite = TRUE, temporary = FALSE) - - - # Check non-chronological insertion - logger <- LoggerNull$new() - update_snapshot(t0, conn, "test.SCDB_tmp1", "2022-01-01", logger = logger) - expect_identical(dplyr::collect(t0) %>% dplyr::arrange(col1), - dplyr::collect(get_table(conn, "test.SCDB_tmp1")) %>% dplyr::arrange(col1)) - - update_snapshot(t2, conn, "test.SCDB_tmp1", "2022-03-01", logger = logger) - expect_identical(dplyr::collect(t2) %>% dplyr::arrange(col1), - dplyr::collect(get_table(conn, "test.SCDB_tmp1")) %>% dplyr::arrange(col1)) - - update_snapshot(t1, conn, "test.SCDB_tmp1", "2022-02-01", logger = logger, enforce_chronological_order = FALSE) - expect_identical( - dplyr::collect(t1) %>% dplyr::arrange(col1), - dplyr::collect(get_table(conn, "test.SCDB_tmp1", slice_ts = "2022-02-01")) %>% dplyr::arrange(col1) - ) - - t_ref <- - tibble::tibble(col1 = c("A", "B", "A", "C", "B", "C"), - col2 = c(NA_real_, NA_real_, 1, NA_real_, 2, 3), - from_ts = c("2022-01-01", "2022-01-01", "2022-02-01", "2022-02-01", "2022-03-01", "2022-03-01"), - until_ts = c("2022-02-01", "2022-03-01", NA, "2022-03-01", NA, NA)) - - expect_identical( - get_table(conn, "test.SCDB_tmp1", slice_ts = NULL) %>% - dplyr::select(!"checksum") %>% - dplyr::collect() %>% - dplyr::mutate(from_ts = strftime(from_ts), - until_ts = strftime(until_ts)) %>% - dplyr::arrange(col1, from_ts), - t_ref %>% - dplyr::arrange(col1, from_ts) - ) - - if (DBI::dbExistsTable(conn, id("test.SCDB_tmp1", conn))) DBI::dbRemoveTable(conn, id("test.SCDB_tmp1", conn)) - - connection_clean_up(conn) - } -}) - - -test_that("update_snapshot() handles 'NULL' updates", { - for (conn in get_test_conns()) { - - if (DBI::dbExistsTable(conn, id("test.SCDB_tmp1", conn))) DBI::dbRemoveTable(conn, id("test.SCDB_tmp1", conn)) - if (DBI::dbExistsTable(conn, id("test.SCDB_logs", conn))) DBI::dbRemoveTable(conn, id("test.SCDB_logs", conn)) - - # Use mtcars as the test data set - .data <- mtcars %>% - dplyr::copy_to(conn, df = ., name = unique_table_name()) - defer_db_cleanup(.data) - - # This is a simple update where 23 rows are replaced with 23 new ones on the given date - db_table <- "test.SCDB_tmp1" - - create_logger <- function(timestamp) { - Logger$new( - db_table = db_table, - timestamp = timestamp, - log_path = NULL, - log_table_id = "test.SCDB_logs", - log_conn = conn, - output_to_console = FALSE - ) - } - - # Update the table with update_snapshot() and store the results - update_snapshot(.data, conn, db_table, "2022-10-03 09:00:00", logger = create_logger("2022-10-03 09:00:00")) - target_data_1 <- get_table(conn, db_table, slice_ts = NULL) %>% dplyr::collect() - - # Update the table with the same data again update_snapshot() and store the results - update_snapshot(.data, conn, db_table, "2022-10-04 09:00:00", logger = create_logger("2022-10-04 09:00:00")) - target_data_2 <- get_table(conn, db_table, slice_ts = NULL) %>% dplyr::collect() - - # Check that the two updates are identical - expect_identical(target_data_1, target_data_2) - - # Confirm with logs that no updates have been made - logs <- get_table(conn, id("test.SCDB_logs", conn)) %>% - dplyr::collect() %>% - dplyr::arrange(date) - - expect_identical(logs$n_insertions, c(nrow(mtcars), 0L)) - expect_identical(logs$n_deactivations, c(0L, 0L)) - - connection_clean_up(conn) - } -}) - - -test_that("update_snapshot() works with Id objects", { - withr::local_options("SCDB.log_path" = NULL) # No file logging - - for (conn in get_test_conns()) { - - target_table <- id("test.mtcars_modified", conn) - - logger <- Logger$new(output_to_console = FALSE, - timestamp = Sys.time(), - db_table = "test.mtcars_modified", - log_conn = NULL, - log_table_id = NULL, - warn = FALSE) - - expect_no_error( - mtcars %>% - dplyr::mutate(disp = sample(mtcars$disp, nrow(mtcars))) %>% - dplyr::copy_to(dest = conn, df = ., name = unique_table_name()) %>% - update_snapshot( - conn = conn, - db_table = target_table, - logger = logger, - timestamp = format(Sys.time()) - ) - ) - - connection_clean_up(conn) - } -}) - - -test_that("update_snapshot() checks table formats", { - - withr::local_options("SCDB.log_path" = tempdir()) - - for (conn in get_test_conns()) { - - mtcars_table <- dplyr::tbl(conn, id("__mtcars_historical", conn = conn)) - timestamp <- Sys.time() - - expect_warning( - logger <- Logger$new(log_path = NULL, log_table_id = NULL, output_to_console = FALSE), # nolint: implicit_assignment_linter - "NO file or database logging will be done." - ) - - # Test columns not matching - broken_table <- dplyr::copy_to(conn, dplyr::select(mtcars, !"mpg"), name = "mtcars_broken", overwrite = TRUE) - - expect_error( - update_snapshot( - .data = broken_table, - conn = conn, - db_table = mtcars_table, - timestamp = timestamp, - logger = logger - ), - "Columns do not match!" - ) - - file.remove(list.files(getOption("SCDB.log_path"), pattern = format(timestamp, "^%Y%m%d.%H%M"), full.names = TRUE)) - - # Test target table not being a historical table - expect_error( - update_snapshot( - dplyr::tbl(conn, id("__mtcars", conn = conn)), - conn, - id("__mtcars", conn = conn), - timestamp = timestamp, - logger = logger - ), - "Table does not seem like a historical table" - ) - - connection_clean_up(conn) - } -}) - - -test_that("update_snapshot() works with across connection", { - skip_if_not_installed("RSQLite") - - withr::local_options("SCDB.log_path" = NULL) # No file logging - - # Test a data transfer from a local SQLite to the test connection - source_conn <- DBI::dbConnect(RSQLite::SQLite()) - - # Create a table for the tests - mtcars_modified <- mtcars %>% - dplyr::mutate(name = rownames(mtcars)) - - # Copy table to the source - .data <- dplyr::copy_to(dest = source_conn, df = mtcars_modified, name = unique_table_name()) - - # For each conn, we test if update_snapshot preserves data types - for (target_conn in get_test_conns()) { - - target_table <- id("test.mtcars_modified", target_conn) - if (DBI::dbExistsTable(target_conn, target_table)) DBI::dbRemoveTable(target_conn, target_table) - - logger <- LoggerNull$new() - - # Check we can transfer without error - expect_no_error( - update_snapshot( - .data, - conn = target_conn, - db_table = target_table, - logger = logger, - timestamp = format(Sys.time()) - ) - ) - - # Check that if we collect the table, the signature will match the original - table_signature <- get_table(target_conn, target_table) %>% - dplyr::collect() %>% - dplyr::summarise(dplyr::across(tidyselect::everything(), ~ class(.)[1])) %>% - as.data.frame() - - expect_identical( - table_signature, - dplyr::summarise(mtcars_modified, dplyr::across(tidyselect::everything(), ~ class(.)[1])) - ) - - - DBI::dbRemoveTable(target_conn, target_table) - connection_clean_up(target_conn) - rm(logger) - invisible(gc()) - } - connection_clean_up(source_conn) - - - ## Now we test the reverse transfer - - # Test a data transfer from the test connection to a local SQLite - target_conn <- DBI::dbConnect(RSQLite::SQLite()) - - # For each conn, we test if update_snapshot preserves data types - for (source_conn in get_test_conns()) { - - .data <- dplyr::copy_to(dest = source_conn, df = mtcars_modified, name = unique_table_name()) - - target_table <- id("mtcars_modified", target_conn) - if (DBI::dbExistsTable(target_conn, target_table)) DBI::dbRemoveTable(target_conn, target_table) - - logger <- LoggerNull$new() - - # Check we can transfer without error - expect_no_error( - update_snapshot( - .data, - conn = target_conn, - db_table = target_table, - logger = logger, - timestamp = format(Sys.time()) - ) - ) - - # Check that if we collect the table, the signature will match the original - table_signature <- get_table(target_conn, target_table) %>% - dplyr::collect() %>% - dplyr::summarise(dplyr::across(tidyselect::everything(), ~ class(.)[1])) %>% - as.data.frame() - - expect_identical( - table_signature, - dplyr::summarise(mtcars_modified, dplyr::across(tidyselect::everything(), ~ class(.)[1])) - ) - - - connection_clean_up(source_conn) - rm(logger) - invisible(gc()) - } - connection_clean_up(target_conn) -})