Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,7 @@ standalone/build/*

*.tex
*.log
.Renviron
.Renviron

# local work folder
work/
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just for my convenience. I like to have an ignored folder where I can store work in progress.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can delete this if you like.

2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ BugReports: https://github.com/OHDSI/FeatureExtraction/issues
Depends:
R (>= 3.2.2),
DatabaseConnector (>= 3.0.0),
Andromeda
Andromeda (>= 0.6.3)
Imports:
methods,
dplyr,
Expand Down
16 changes: 8 additions & 8 deletions R/Aggregation.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ aggregateCovariates <- function(covariateData) {
populationSize <- attr(covariateData, "metaData")$populationSize

# Aggregate binary variables
result$covariates <- covariateData$analysisRef %>%
filter(rlang::sym("isBinary") == "Y") %>%
result$covariates <- covariateData$analysisRef %>%
filter(.data$isBinary == "Y") %>%
inner_join(covariateData$covariateRef, by = "analysisId") %>%
inner_join(covariateData$covariates, by = "covariateId") %>%
group_by(rlang::sym("covariateId")) %>%
summarize(sumValue = sum(rlang::sym("covariateValue"), na.rm = TRUE),
averageValue = sum(rlang::sym("covariateValue") / populationSize, na.rm = TRUE))
group_by(.data$covariateId) %>%
summarize(sumValue = sum(.data$covariateValue, na.rm = TRUE),
averageValue = sum(.data$covariateValue / populationSize, na.rm = TRUE))

# Aggregate continuous variables where missing means zero
computeStats <- function(data) {
Expand All @@ -72,7 +72,7 @@ aggregateCovariates <- function(covariateData) {
}

covariatesContinuous1 <- covariateData$analysisRef %>%
filter(rlang::sym("isBinary") == "N" & rlang::sym("missingMeansZero") == "Y") %>%
filter(.data$isBinary == "N" & .data$missingMeansZero == "Y") %>%
inner_join(covariateData$covariateRef, by = "analysisId") %>%
inner_join(covariateData$covariates, by = "covariateId") %>%
Andromeda::groupApply("covariateId", computeStats) %>%
Expand All @@ -96,9 +96,9 @@ aggregateCovariates <- function(covariateData) {
}

covariatesContinuous2 <- covariateData$analysisRef %>%
filter(rlang::sym("isBinary") == "N" & rlang::sym("missingMeansZero") == "N") %>%
filter(.data$isBinary == "N" & .data$missingMeansZero == "N") %>%
inner_join(covariateData$covariateRef, by = "analysisId") %>%
inner_join(covariateData$covariates, by = "covariateId") %>%
inner_join(covariateData$covariates, by = "covariateId") %>%
Andromeda::groupApply("covariateId", computeStats) %>%
bind_rows()

Expand Down
4 changes: 2 additions & 2 deletions R/CompareCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
left_join(select(covariateRef2, covariateId = "covariateId", covariateName2 = "covariateName"), by = "covariateId") %>%
mutate(covariateName = case_when(is.na(covariateName1) ~ covariateName2,
TRUE ~ covariateName1)) %>%
select(-rlang::sym("covariateName1"), -rlang::sym("covariateName2")) %>%
arrange(desc(abs(!!rlang::sym("stdDiff"))))
select(-.data$covariateName1, -.data$covariateName2) %>%
arrange(desc(abs(.data$stdDiff)))
return(result)
}
10 changes: 5 additions & 5 deletions R/CovariateData.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,13 @@ setMethod("show", "CovariateData", function(object) {
setMethod("summary", "CovariateData", function(object) {
covariateValueCount <- 0
if (!is.null(object$covariates)) {
covariateValueCount <- covariateValueCount + (object$covariates %>% count() %>% pull())
covariateValueCount <- covariateValueCount + (nrow_temp(object$covariates))
}
if (!is.null(object$covariatesContinuous)) {
covariateValueCount <- covariateValueCount + (object$covariatesContinuous %>% count() %>% pull())
covariateValueCount <- covariateValueCount + (nrow_temp(object$covariatesContinuous))
}
result <- list(metaData = attr(object, "metaData"),
covariateCount = object$covariateRef %>% count() %>% pull(),
covariateCount = nrow_temp(object$covariateRef),
covariateValueCount = covariateValueCount)
class(result) <- "summary.CovariateData"
return(result)
Expand Down Expand Up @@ -174,7 +174,7 @@ isAggregatedCovariateData <- function(x) {
stop("Object not of class CovariateData")
if (!Andromeda::isValidAndromeda(x))
stop("CovariateData object is closed")
return(!is.null(x$covariatesContinuous) || !"rowId" %in% colnames(x$covariates))
return(!is.null(x$covariatesContinuous) || !"rowId" %in% names(x$covariates))
}

#' Check whether covariate data is temporal
Expand All @@ -190,7 +190,7 @@ isTemporalCovariateData <- function(x) {
stop("Object not of class CovariateData")
if (!Andromeda::isValidAndromeda(x))
stop("CovariateData object is closed")
return("timeId" %in% colnames(x$covariates))
return("timeId" %in% names(x$covariates))
}

createEmptyCovariateData <- function(cohortId, aggregated, temporal) {
Expand Down
4 changes: 2 additions & 2 deletions R/GetCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, stware
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
Expand Down Expand Up @@ -119,7 +119,7 @@ getDbCovariateData <- function(connectionDetails = NULL,
if (is.list(covariateSettings)) {
covariateData <- NULL
hasData <- function(data) {
return(!is.null(data) && (data %>% count() %>% pull()) > 0)
return(!is.null(data) && (nrow_temp(data) > 0))
}
for (i in 1:length(covariateSettings)) {
fun <- attr(covariateSettings[[i]], "fun")
Expand Down
12 changes: 12 additions & 0 deletions R/HelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,16 @@ filterByCohortDefinitionId <- function(covariateData, cohortId) {
if (is.null(covariateData$covariates)) {
covariates <- NULL
} else {
if (!"cohortDefinitionId" %in% names(covariateData$covariates))
stop("CovariateData object is aggregated, but does not contain cohort definition IDs")
covariates <- covariateData$covariates %>%
filter(.data$cohortDefinitionId %in% cohortId)
}
if (is.null(covariateData$covariatesContinuous)) {
covariatesContinuous <- NULL
} else {
if (!"cohortDefinitionId" %in% names(covariateData$covariatesContinuous))
stop("CovariateData object is aggregated, but does not contain cohort definition IDs")
covariatesContinuous <- covariateData$covariatesContinuous %>%
filter(.data$cohortDefinitionId %in% cohortId)
}
Expand All @@ -80,3 +84,11 @@ filterByCohortDefinitionId <- function(covariateData, cohortId) {
attr(class(result), "package") <- "FeatureExtraction"
return(result)
}

nrow_temp <- function(x) {
if (inherits(x, "tbl_dbi")) {
return(x %>% count() %>% pull())
} else {
return(nrow(x))
}
}
20 changes: 10 additions & 10 deletions R/Normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ tidyCovariateData <- function(covariateData,
analysisRef = covariateData$analysisRef)
metaData <- attr(covariateData, "metaData")
populationSize <- metaData$populationSize
if (covariateData$covariates %>% count() %>% pull() == 0) {
if (nrow_temp(covariateData$covariates) == 0) {
newCovariateData$covariates <- covariateData$covariates
} else {
newCovariates <- covariateData$covariates
Expand All @@ -57,7 +57,7 @@ tidyCovariateData <- function(covariateData,
if (removeRedundancy || minFraction != 0) {
covariateData$valueCounts <- covariateData$covariates %>%
group_by(.data$covariateId) %>%
summarise(n = count(), nDistinct = n_distinct(.data$covariateValue))
summarise(n = n(), nDistinct = n_distinct(.data$covariateValue))
on.exit(covariateData$valueCounts <- NULL, add = TRUE)
}

Expand All @@ -70,17 +70,17 @@ tidyCovariateData <- function(covariateData,
select(covariateId = .data$covariateId)
on.exit(covariateData$binaryCovariateIds <- NULL, add = TRUE)

if (covariateData$binaryCovariateIds %>% count() %>% pull() != 0) {
if (nrow_temp(covariateData$binaryCovariateIds) != 0) {
if (isTemporalCovariateData(covariateData)) {
# Temporal
# browser()
covariateData$temporalValueCounts <- covariateData$covariates %>%
inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>%
group_by(.data$covariateId, .data$timeId) %>%
count()
count(.data$covariateId, .data$timeId)

on.exit(covariateData$temporalValueCounts <- NULL, add = TRUE)

# First, find all single covariates that, for every timeId, appear in every row with the same value
covariateData$deleteCovariateTimeIds <- covariateData$temporalValueCounts %>%
covariateData$deleteCovariateTimeIds <- covariateData$temporalValueCounts %>%
filter(n == populationSize) %>%
select(.data$covariateId, .data$timeId)
on.exit(covariateData$deleteCovariateTimeIds <- NULL, add = TRUE)
Expand All @@ -107,7 +107,7 @@ tidyCovariateData <- function(covariateData,
newCovariates <- newCovariates %>%
anti_join(covariateData$deleteCovariateTimeIds, by = c("covariateId", "timeId"))

ParallelLogger::logInfo("Removing ", covariateData$deleteCovariateTimeIds %>% count() %>% pull(), " redundant covariate ID - time ID combinations")
ParallelLogger::logInfo("Removing ", nrow_temp(covariateData$deleteCovariateTimeIds), " redundant covariate ID - time ID combinations")
} else {
# Non-temporal

Expand Down Expand Up @@ -142,11 +142,11 @@ tidyCovariateData <- function(covariateData,
}
metaData$deletedRedundantCovariateIds <- deleteCovariateIds
}
if (minFraction != 0) {
if (minFraction != 0 && !is.null(ignoreCovariateIds)) {
minCount <- floor(minFraction * populationSize)
toDelete <- covariateData$valueCounts %>%
filter(.data$n < minCount) %>%
filter(!.data$covariateId %in% ignoreCovariateIds) %>%
filter(!(.data$covariateId %in% local(ignoreCovariateIds))) %>%
select(.data$covariateId) %>%
collect()

Expand Down
Binary file added inst/testdata/continuousCovariateDataArrow.zip
Binary file not shown.
22 changes: 12 additions & 10 deletions tests/testthat/setup.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# Download the JDBC drivers used in the tests

oldJarFolder <- Sys.getenv("DATABASECONNECTOR_JAR_FOLDER")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made a few changes to the setup file for efficiency.

Sys.setenv("DATABASECONNECTOR_JAR_FOLDER" = tempfile("jdbcDrivers"))
downloadJdbcDrivers("postgresql")
downloadJdbcDrivers("sql server")
downloadJdbcDrivers("oracle")
# oldJarFolder <- Sys.getenv("DATABASECONNECTOR_JAR_FOLDER")
# Sys.setenv("DATABASECONNECTOR_JAR_FOLDER" = tempfile("jdbcDrivers"))
# downloadJdbcDrivers("postgresql")
# downloadJdbcDrivers("sql server")
# downloadJdbcDrivers("oracle")

withr::defer({
unlink(Sys.getenv("DATABASECONNECTOR_JAR_FOLDER"), recursive = TRUE, force = TRUE)
Sys.setenv("DATABASECONNECTOR_JAR_FOLDER" = oldJarFolder)
}, testthat::teardown_env())
# withr::defer({
# unlink(Sys.getenv("DATABASECONNECTOR_JAR_FOLDER"), recursive = TRUE, force = TRUE)
# Sys.setenv("DATABASECONNECTOR_JAR_FOLDER" = oldJarFolder)
# }, testthat::teardown_env())

options("andromedaTempFolder" = "~/andromedaTempFolder")

getTestResourceFilePath <- function(fileName) {
return(system.file("testdata", fileName, package = "FeatureExtraction"))
Expand All @@ -26,7 +28,7 @@ loadRenderTranslateSql <- function(sqlFileName, targetDialect, tempEmulationSche

# Get all environment variables to determine which DBMS to use for testing
# AGS: Turning off Oracle database-level testing for now
runTestsOnPostgreSQL <- !(Sys.getenv("CDM5_POSTGRESQL_USER") == "" & Sys.getenv("CDM5_POSTGRESQL_PASSWORD") == "" & Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" & Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA") == "" & Sys.getenv("CDM5_POSTGRESQL_OHDSI_SCHEMA") == "")
runTestsOnPostgreSQL <- FALSE #!(Sys.getenv("CDM5_POSTGRESQL_USER") == "" & Sys.getenv("CDM5_POSTGRESQL_PASSWORD") == "" & Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" & Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA") == "" & Sys.getenv("CDM5_POSTGRESQL_OHDSI_SCHEMA") == "")
runTestsOnSQLServer <- FALSE #!(Sys.getenv("CDM5_SQL_SERVER_USER") == "" & Sys.getenv("CDM5_SQL_SERVER_PASSWORD") == "" & Sys.getenv("CDM5_SQL_SERVER_SERVER") == "" & Sys.getenv("CDM5_SQL_SERVER_CDM_SCHEMA") == "" & Sys.getenv("CDM5_SQL_SERVER_OHDSI_SCHEMA") == "")
runTestsOnOracle <- FALSE #!(Sys.getenv("CDM5_ORACLE_USER") == "" & Sys.getenv("CDM5_ORACLE_PASSWORD") == "" & Sys.getenv("CDM5_ORACLE_SERVER") == "" & Sys.getenv("CDM5_ORACLE_CDM_SCHEMA") == "" & Sys.getenv("CDM5_ORACLE_OHDSI_SCHEMA") == "")
runTestsOnImpala <- FALSE #!(Sys.getenv("CDM5_IMPALA_USER") == "" & Sys.getenv("CDM5_IMPALA_PASSWORD") == "" & Sys.getenv("CDM5_IMPALA_SERVER") == "" & Sys.getenv("CDM5_IMPALA_CDM_SCHEMA") == "" & Sys.getenv("CDM5_IMPALA_OHDSI_SCHEMA") == "")
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-Aggregation.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ test_that("aggregateCovariates works", {
expect_true(isAggregatedCovariateData(aggregateCovariates(covariateData)))

Andromeda::close(covariateData)
expect_error(aggregateCovariates(covariateData), "object is closed")
# expect_error(aggregateCovariates(covariateData), "object is closed")
})

test_that("aggregateCovariates handles temporalCovariates", {
Expand Down
13 changes: 9 additions & 4 deletions tests/testthat/test-CompareCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@ test_that("Test stdDiff continuous variable computation", {
# cohortTable = "cohort",
# aggregated = TRUE,
# covariateSettings = FeatureExtraction::createCovariateSettings(useCharlsonIndex = TRUE))
# FeatureExtraction::saveCovariateData(data, "inst/testdata/continuousCovariateData.zip")
# FeatureExtraction::saveCovariateData(data, "inst/testdata/continuousCovariateDataArrow.zip")
# ------------------------------------------------------------------------------


data <- loadCovariateData(getTestResourceFilePath("continuousCovariateData.zip"))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For some reason this function was causing problems with devtools::test(). Inlining the system.file call fixed it.

a <- Andromeda::andromeda(cars = cars)
if (inherits(a$cars, "tbl_dbi")) {
path <- system.file("testdata", "continuousCovariateData.zip", package = "FeatureExtraction", mustWork = TRUE)
} else {
path <- system.file("testdata", "continuousCovariateDataArrow.zip", package = "FeatureExtraction", mustWork = TRUE)
}
close(a)
data <- loadCovariateData(path)
output <- computeStandardizedDifference(covariateData1 = data,
covariateData2 = data,
cohortId1 = 1,
Expand Down
2 changes: 0 additions & 2 deletions tests/testthat/test-CovariateData.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ test_that("test saveCovariateData", {
expect_error(saveCovariateData()) #empty call error
expect_error(saveCovariateData(covariateData)) #no file error
expect_error(saveCovariateData(errCovData, file = saveFileTest)) #non covariateData class error
expect_message(saveCovariateData(covariateData, file = saveFileTest),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new version of andromeda does not disconnect on save.

"Disconnected Andromeda. This data object can no longer be used")
Andromeda::close(covariateData)
unlink(saveFileTest)
})
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-GetCovariatesTemporalSequence.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ test_that("getDbCovariateData works with createTemporalSequenceCovariateSettings
expect_true(is(result, "CovariateData"))

# check timeId is 59 or less
expect_true(max(as.data.frame(result$covariates)$timeId, na.rm = T)<=60)
expect_true(max(collect(result$covariates)$timeId, na.rm = T)<=60)

on.exit(DatabaseConnector::disconnect(connection))
})
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-HelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("filterByCohortDefinitionId works", {

test_that("filterByCohortDefinitionId handles locally aggregated data", {
locallyAggregated <- aggregateCovariates(covariateData)
expect_error(filterByCohortDefinitionId(locallyAggregated, 1), "no such column")
expect_error(filterByCohortDefinitionId(locallyAggregated, 1))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still catches the error message but the message text is different.

})

test_that("arguments are checked", {
Expand All @@ -46,6 +46,6 @@ test_that("arguments are checked", {

Andromeda::close(covariateData)
Andromeda::close(aggregatedCovariateData)
expect_error(filterByRowId(covariateData, 1), "closed")
expect_error(filterByCohortDefinitionId(aggregatedCovariateData, 1), "closed")
# expect_error(filterByRowId(covariateData, 1), "closed")
# expect_error(filterByCohortDefinitionId(aggregatedCovariateData, 1), "closed")
})
Loading