diff --git a/DESCRIPTION b/DESCRIPTION
index 0a6ea20..41620cf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,8 @@ Imports:
tidyr
Suggests:
covr,
- testthat
+ testthat,
+ vdiffr
Remotes:
stitam/breakpoint
License: GPL (>= 3)
diff --git a/R/filter_comparisons.R b/R/filter_comparisons.R
index 11a2b7e..6d7ea62 100644
--- a/R/filter_comparisons.R
+++ b/R/filter_comparisons.R
@@ -49,8 +49,8 @@ filter_comparisons <- function(
if (is.null(data)) {
out <- comps |>
dplyr::filter(
- .data$pattern_id %in% filter_value |
- .data$subject_id %in% filter_value
+ !!rlang::sym("pattern_id") %in% filter_value |
+ !!rlang::sym("subject_id") %in% filter_value
)
return(out)
}
diff --git a/R/find_adapter.R b/R/find_adapter.R
index 26635ab..0fe96fa 100644
--- a/R/find_adapter.R
+++ b/R/find_adapter.R
@@ -35,6 +35,7 @@ find_adapter <- function(
)
stop(msg)
}
+ class(bps) <- class(bps)[-which(class(bps) == "breakpoints")]
if (start_threshold < 0 | start_threshold > 1) {
stop("start_threshold must be between 0 and 1.")
}
diff --git a/R/find_all_adapters.R b/R/find_all_adapters.R
index 00118e3..ec48501 100644
--- a/R/find_all_adapters.R
+++ b/R/find_all_adapters.R
@@ -6,7 +6,7 @@
#' @param data data.frame; a data frame that contains IDs and sequences
#' @param id_var character; variable within \code{data} that stores IDs.
#' @param seq_var character; variable within \code{data} that stores sequences.
-#' @param submat character; the substitution matrix. See
+#' @param submat character; the substitution matrix. See
#' \code{position_scores()} for more information.
#' @param method character; the method to use for finding breakpoints. Either
#' "cemean", "plateau", or "window". See \code{find_breakpoints()} for more
@@ -42,8 +42,8 @@ find_all_adapters <- function(
) {
pairs <- data |>
- dplyr::filter(.data[[id_var]] %in% ids) |>
- dplyr::pull(.data[[id_var]]) |>
+ dplyr::filter(!!rlang::sym(id_var) %in% ids) |>
+ dplyr::pull(!!rlang::sym(id_var)) |>
utils::combn(2) |>
t() |>
as.data.frame()
@@ -106,7 +106,5 @@ find_all_adapters <- function(
return(adapter)
}
}
-
- class(adapters) <- c("adapter", class(adapters))
return(adapters)
}
diff --git a/R/find_breakpoints.R b/R/find_breakpoints.R
index c27ab5a..786447e 100644
--- a/R/find_breakpoints.R
+++ b/R/find_breakpoints.R
@@ -234,15 +234,19 @@ find_breakpoints_window <- function(
rep(ngroups + 1, times = nrow(position_scores$position_scores) - window*ngroups)
)
ms <- position_scores$position_scores %>%
- dplyr::group_by(.data$group) %>%
+ dplyr::group_by(!!rlang::sym("group")) %>%
dplyr::summarise(
window = dplyr::n(),
- mean = round(mean(.data$score), 3),
+ mean = round(mean(!!rlang::sym("score")), 3),
identity_p = suppressWarnings(
- stats::wilcox.test(as.numeric(.data$identity), alternative = "less", mu = pident_threshold)
+ stats::wilcox.test(
+ as.numeric(!!rlang::sym("identity")),
+ alternative = "less", mu = pident_threshold)
)$p.value,
score_p = suppressWarnings(
- stats::wilcox.test(.data$score, alternative = "less", mu = score_threshold)
+ stats::wilcox.test(
+ !!rlang::sym("score"),
+ alternative = "less", mu = score_threshold)
)$p.value
)
ms$pass <- ms$score_p >= p_threshold | ms$identity_p >= p_threshold
diff --git a/R/plot_adapter_matrix.R b/R/plot_adapter_matrix.R
index fcdf809..d6c34b6 100644
--- a/R/plot_adapter_matrix.R
+++ b/R/plot_adapter_matrix.R
@@ -3,7 +3,7 @@
#' Take an adapter matrix and optionally data frame of cluster designations and
#' plot them on a simple heatmap.
#' @import ggplot2 patchwork
-#' @param x adapter_matrix; a matrix of class "adapter_matrix". Use
+#' @param x adapter_matrix; a matrix of class "adapter_matrix". Use
#' adapter_matrix() to create a compatible matrix.
#' @param clusters data.frame; a table of RBP IDs and cluster designations.
#' @param ... additional arguments (not used).
@@ -26,10 +26,13 @@ plot.adapter_matrix <- function(x, clusters = NULL, ...) {
mat_long <- reshape2::melt(mat, value.name = "pident")
p_main <- ggplot2::ggplot(
mat_long,
- ggplot2::aes(x = .data$Var1, y = .data$Var2, fill = .data$pident)
+ ggplot2::aes(
+ x = !!rlang::sym("Var1"),
+ y = !!rlang::sym("Var2"),
+ fill = !!rlang::sym("pident"))
) +
ggplot2::geom_tile(color = "black") +
- ggplot2::geom_text(ggplot2::aes(label = round(.data$pident, 1))) +
+ ggplot2::geom_text(ggplot2::aes(label = round(!!rlang::sym("pident"), 1))) +
ggplot2::scale_fill_gradient(low = "white", high = "salmon") +
ggplot2::theme_minimal() +
ggplot2::labs(x = "", y = "", fill = "") +
@@ -39,10 +42,13 @@ plot.adapter_matrix <- function(x, clusters = NULL, ...) {
)
if (is.null(clusters)) return(p_main)
row_anno <- clusters |>
- dplyr::rename(Var2 = .data$id) |>
- dplyr::mutate(Var2 = factor(.data$Var2, levels = unique(mat_long$Var2)))
+ dplyr::rename(Var2 = !!rlang::sym("id")) |>
+ dplyr::mutate(
+ Var2 = factor(!!rlang::sym("Var2"), levels = unique(mat_long$Var2)))
p_row <- ggplot2::ggplot(
- row_anno, ggplot2::aes(x = 1, y = .data$Var2, fill = .data$cluster)) +
+ row_anno,
+ ggplot2::aes(
+ x = 1, y = !!rlang::sym("Var2"), fill = !!rlang::sym("cluster"))) +
ggplot2::geom_tile() +
ggplot2::scale_y_discrete(
limits = levels(mat_long$Var2), expand = c(0, 0)) +
@@ -55,10 +61,10 @@ plot.adapter_matrix <- function(x, clusters = NULL, ...) {
legend.position = "none"
)
col_anno <- clusters |>
- dplyr::rename(Var1 = .data$id) |>
- dplyr::mutate(Var1 = factor(.data$Var1, levels = unique(mat_long$Var1)))
+ dplyr::rename(Var1 = !!rlang::sym("id")) |>
+ dplyr::mutate(Var1 = factor(!!rlang::sym("Var1"), levels = unique(mat_long$Var1)))
p_col <- ggplot2::ggplot(
- col_anno, ggplot2::aes(x = .data$Var1, y = 1, fill = .data$cluster)) +
+ col_anno, ggplot2::aes(x = !!rlang::sym("Var1"), y = 1, fill = !!rlang::sym("cluster"))) +
ggplot2::geom_tile() +
ggplot2::scale_x_discrete(limits = levels(mat_long$Var1), expand = c(0, 0)) +
ggplot2::scale_y_discrete(expand = c(0, 0)) +
diff --git a/R/plot_position_scores.R b/R/plot_position_scores.R
index f631e2f..c09cf27 100644
--- a/R/plot_position_scores.R
+++ b/R/plot_position_scores.R
@@ -119,16 +119,16 @@ plot.ps <- function(
values_to = "position"
) %>%
dplyr::arrange("position") %>%
- dplyr::filter(.data$method %in% .env$method)
+ dplyr::filter(!!rlang::sym("method") %in% .env$method)
if (is.null(highlight)) {
highlight_value <- end_long |>
- dplyr::pull(.data$position) |>
+ dplyr::pull(!!rlang::sym("position")) |>
stats::na.omit() |>
max()
} else {
highlight_value <- end_long |>
- dplyr::filter(.data$method == highlight) |>
- dplyr::pull(.data$position)
+ dplyr::filter(!!rlang::sym("method") == highlight) |>
+ dplyr::pull(!!rlang::sym("position"))
}
# moving averages
avg <- vector()
@@ -153,16 +153,16 @@ plot.ps <- function(
)
}
if (type == "indiv") {
- g <- ggplot(res, aes(.data$position, .data$score))
+ g <- ggplot(res, aes(!!rlang::sym("position"), !!rlang::sym("score")))
}
if (type == "ma") {
- g <- ggplot(res, aes(.data$position, .data$ma))
+ g <- ggplot(res, aes(!!rlang::sym("position"), !!rlang::sym("ma")))
}
if (type == "cusum") {
- g <- ggplot(res, aes(.data$position, .data$cumsum))
+ g <- ggplot(res, aes(!!rlang::sym("position"), !!rlang::sym("cumsum")))
}
g <- g +
- geom_point(aes(color = .data$domain), alpha = 0.5) +
+ geom_point(aes(color = !!rlang::sym("domain")), alpha = 0.5) +
scale_color_manual(
values = c("N-terminal" = "red", "C-terminal" = "blue"),
na.translate = FALSE
@@ -170,9 +170,9 @@ plot.ps <- function(
geom_vline(
data = end_long,
aes(
- xintercept = .data$position,
- group = .data$method,
- linetype = .data$method
+ xintercept = !!rlang::sym("position"),
+ group = !!rlang::sym("method"),
+ linetype = !!rlang::sym("method")
),
key_glyph = "path"
)
diff --git a/R/position_scores.R b/R/position_scores.R
index fb92b4a..638f114 100644
--- a/R/position_scores.R
+++ b/R/position_scores.R
@@ -42,7 +42,7 @@ position_scores <- function(
if (!subject_id %in% data[[id_var]]) {
stop(paste0("Subject ID not found: ", subject_id))
}
- df <- data |> dplyr::filter(.data[[id_var]] %in% c(pattern_id, subject_id))
+ df <- data |> dplyr::filter(!!rlang::sym(id_var) %in% c(pattern_id, subject_id))
df[[seq_var]] <- gsub("\\*", "", df[[seq_var]])
valid_input <- validate_rbps(
df, id_var = id_var, seq_var = seq_var, verbose = verbose
diff --git a/README.md b/README.md
index df3758b..4e87647 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
[](https://www.repostatus.org/#active)
[](https://github.com/sbthandras/tailor/actions)
-[](#test-coverage)
+[](#test-coverage)
[](https://doi.org/10.64898/2026.02.20.706991)
diff --git a/data/adapters.rda b/data/adapters.rda
index 1f3243d..3c7f2be 100644
Binary files a/data/adapters.rda and b/data/adapters.rda differ
diff --git a/man/find_all_adapters.Rd b/man/find_all_adapters.Rd
index b7ce6d5..45b0be9 100644
--- a/man/find_all_adapters.Rd
+++ b/man/find_all_adapters.Rd
@@ -28,7 +28,7 @@ find_all_adapters(
\item{seq_var}{character; variable within \code{data} that stores sequences.}
-\item{submat}{character; the substitution matrix. See
+\item{submat}{character; the substitution matrix. See
\code{position_scores()} for more information.}
\item{method}{character; the method to use for finding breakpoints. Either
diff --git a/man/plot.adapter_matrix.Rd b/man/plot.adapter_matrix.Rd
index 86d267a..1e105a1 100644
--- a/man/plot.adapter_matrix.Rd
+++ b/man/plot.adapter_matrix.Rd
@@ -7,7 +7,7 @@
\method{plot}{adapter_matrix}(x, clusters = NULL, ...)
}
\arguments{
-\item{x}{adapter_matrix; a matrix of class "adapter_matrix". Use
+\item{x}{adapter_matrix; a matrix of class "adapter_matrix". Use
adapter_matrix() to create a compatible matrix.}
\item{clusters}{data.frame; a table of RBP IDs and cluster designations.}
diff --git a/tests/testthat/_snaps/plot_adapter_matrix/amat-with-clusters.svg b/tests/testthat/_snaps/plot_adapter_matrix/amat-with-clusters.svg
new file mode 100644
index 0000000..6a216b5
--- /dev/null
+++ b/tests/testthat/_snaps/plot_adapter_matrix/amat-with-clusters.svg
@@ -0,0 +1,1047 @@
+
+
diff --git a/tests/testthat/_snaps/plot_adapter_matrix/amat-without-clusters.svg b/tests/testthat/_snaps/plot_adapter_matrix/amat-without-clusters.svg
new file mode 100644
index 0000000..e60005d
--- /dev/null
+++ b/tests/testthat/_snaps/plot_adapter_matrix/amat-without-clusters.svg
@@ -0,0 +1,912 @@
+
+
diff --git a/tests/testthat/_snaps/plot_position_scores/position-scores.svg b/tests/testthat/_snaps/plot_position_scores/position-scores.svg
new file mode 100644
index 0000000..4ca1469
--- /dev/null
+++ b/tests/testthat/_snaps/plot_position_scores/position-scores.svg
@@ -0,0 +1,1016 @@
+
+
diff --git a/tests/testthat/test-adapter_matrix.R b/tests/testthat/test-adapter_matrix.R
new file mode 100644
index 0000000..cda64a4
--- /dev/null
+++ b/tests/testthat/test-adapter_matrix.R
@@ -0,0 +1,48 @@
+test_that("adapter_matrix() works", {
+ data(adapters)
+ amat <- adapters |> adapter_matrix()
+
+ expect_true(inherits(amat, "adapter_matrix"))
+ expect_equal(dim(amat), c(20,20))
+ expect_equal(unique(diag(amat)), 1)
+
+ index_x <- which(rownames(amat) == "MN395291-1")
+ index_y <- which(colnames(amat) == "ON513429-1")
+ expect_equal(amat[index_x, index_y], 0.876)
+})
+
+test_that("adapter_matrix() fails when input is not an adapter df", {
+ data(rbps)
+
+ expect_error(rbps |> adapter_matrix())
+ msg <- capture_error(rbps |> adapter_matrix())
+
+ expect_equal(msg$message, paste0(
+ "'adapters' must be a data frame of class 'adapter'. ",
+ "Use find_adapter() to create a compatible data frame."
+ ))
+})
+
+test_that("adapter_matrix() fails for id mismatch", {
+ data(adapters)
+ expect_error(adapters |> adapter_matrix(ids = "kutya"))
+
+ msg <- capture_error(adapters |> adapter_matrix(ids = "kutya"))
+ expect_equal(
+ msg$message,
+ "The following ids were not found in the 'adapter' data frame: kutya"
+ )
+
+ ids <- c(adapters$pattern_id, adapters$subject_id) |> unique()
+ ids <- ids[-1]
+ expect_error(adapters |> adapter_matrix(ids = ids))
+
+ msg <- capture_error(adapters |> adapter_matrix(ids = ids))
+ expect_equal(
+ msg$message,
+ "The following ids were not found in the 'ids' vector: MN395291-1"
+ )
+
+})
+
+
diff --git a/tests/testthat/test-cluster_adapters.R b/tests/testthat/test-cluster_adapters.R
new file mode 100644
index 0000000..9891256
--- /dev/null
+++ b/tests/testthat/test-cluster_adapters.R
@@ -0,0 +1,26 @@
+test_that("cluster_adapters() works", {
+ data(rbps)
+ data(adapters)
+ amat <- adapter_matrix(adapters)
+
+ clusters1 <- cluster_adapters(amat, k_min = 1)
+ expect_true(inherits(clusters1, "data.frame"))
+ expect_equal(dim(clusters1), c(20,2))
+ expect_equal(clusters1 |> dplyr::pull(cluster) |> unique() |> length(), 2)
+
+ clusters2 <- cluster_adapters(amat, k_min = 2, k_max = 5)
+ expect_true(inherits(clusters2, "data.frame"))
+ expect_equal(dim(clusters2), c(20,2))
+ expect_equal(clusters2 |> dplyr::pull(cluster) |> unique() |> length(), 2)
+})
+
+test_that("cluster_adapters() fails when input is not an adapter matrix", {
+ data(rbps)
+ expect_error(cluster_adapters(rbps, k_min = 2, k_max = 5))
+
+ msg <- capture_error(cluster_adapters(rbps, k_min = 2, k_max = 5))
+ expect_equal(msg$message, paste0(
+ "mat must be a matrix of class 'adapter_matrix'. ",
+ "Use adapter_matrix() to create a compatible matrix."
+ ))
+})
diff --git a/tests/testthat/test-completeness.R b/tests/testthat/test-completeness.R
new file mode 100644
index 0000000..6b4d147
--- /dev/null
+++ b/tests/testthat/test-completeness.R
@@ -0,0 +1,11 @@
+test_that("completeness() works", {
+ data(rbps)
+ data(adapters)
+ amat <- adapter_matrix(adapters)
+ clusters <- cluster_adapters(amat, k_min = 2, k_max = 2)
+ ids <- clusters |> dplyr::filter(cluster == "ACL 1") |> dplyr::pull(id)
+ index <- which(rownames(amat) %in% ids)
+ out <- completeness(mat = amat, index = index)
+
+ expect_equal(out, 1)
+})
diff --git a/tests/testthat/test-filter_comparisons.R b/tests/testthat/test-filter_comparisons.R
new file mode 100644
index 0000000..324e457
--- /dev/null
+++ b/tests/testthat/test-filter_comparisons.R
@@ -0,0 +1,15 @@
+test_that("filter_comparisons() works", {
+ data(rbps)
+ data(adapters)
+ amat <- adapter_matrix(adapters)
+ clusters <- cluster_adapters(amat, k_min = 2, k_max = 5)
+ rbps <- dplyr::left_join(rbps, clusters, by = c("Core_ORF" = "id"))
+ f1 <- adapters |> filter_comparisons("ON513429-1")
+ f2 <- adapters |> filter_comparisons("MN395291-1") |> filter_comparisons("ON513429-1")
+ f3 <- adapters |> filter_comparisons("ACL 2", filter_by = "cluster", data = rbps)
+
+ expect_true(inherits(f1, "adapter"))
+ expect_equal(dim(f1), c(19, 6))
+ expect_equal(dim(f2), c(1, 6))
+ expect_equal(dim(f3), c(145, 6))
+})
diff --git a/tests/testthat/test-find_adapter.R b/tests/testthat/test-find_adapter.R
new file mode 100644
index 0000000..97af749
--- /dev/null
+++ b/tests/testthat/test-find_adapter.R
@@ -0,0 +1,47 @@
+test_that("find_adapter() works", {
+ data(rbps)
+ ps <- position_scores("MN395291-1", "ON513429-1", data = rbps)
+ bps <- find_breakpoints(ps, Nmax = 5)
+ ada <- find_adapter(bps)
+
+ expect_true(inherits(ada, "adapter"))
+ expect_equal(nrow(ada), 1)
+ expect_equal(ada$pident, 0.901)
+})
+
+test_that("find_adapter() fails when input is not a breakpoints object", {
+ data(rbps)
+ ps <- position_scores("MN395291-1", "ON513429-1", data = rbps)
+ expect_error(find_adapter(ps))
+ msg <- capture_error(find_adapter(ps))
+ expect_equal(msg$message, paste0(
+ "bps must be a data frame of class 'breakpoints'. ",
+ "Use find_breakpoints() to create a compatible data frame."
+ ))
+})
+
+test_that("find_adapter() fails when start_threshold is invalid", {
+ data(rbps)
+ ps <- position_scores("MN395291-1", "ON513429-1", data = rbps)
+ bps <- find_breakpoints(ps, Nmax = 5)
+ expect_error(find_adapter(bps, start_threshold = 1.1))
+ msg <- capture_error(find_adapter(bps, start_threshold = 1.1))
+ expect_equal(
+ msg$message,
+ "start_threshold must be between 0 and 1."
+ )
+})
+
+test_that("find_adapter() returns NAs if no adapter found", {
+ data(rbps)
+ ps <- position_scores("MW056503-1", "OX335376-1", data = rbps)
+ bps <- find_breakpoints(ps, Nmax = 5)
+ ada <- find_adapter(bps)
+
+ expect_true(inherits(ada, "adapter"))
+ expect_equal(nrow(ada), 1)
+ expect_equal(ada$start, NA)
+ expect_equal(ada$end, NA)
+ expect_equal(ada$mean_score, NA)
+ expect_equal(ada$pident, NA)
+})
diff --git a/tests/testthat/test-find_all_adapters.R b/tests/testthat/test-find_all_adapters.R
new file mode 100644
index 0000000..c8d4f25
--- /dev/null
+++ b/tests/testthat/test-find_all_adapters.R
@@ -0,0 +1,8 @@
+test_that("find_all_adapters() works", {
+ data(rbps)
+ adapters <- find_all_adapters(rbps$Core_ORF[1:3], data = rbps)
+
+ expect_true(inherits(adapters, "adapter"))
+ expect_equal(nrow(adapters), 3)
+ expect_equal(adapters$pident, c(0.901, 0.940, 0.914))
+})
diff --git a/tests/testthat/test-find_breakpoints.R b/tests/testthat/test-find_breakpoints.R
new file mode 100755
index 0000000..3bf1034
--- /dev/null
+++ b/tests/testthat/test-find_breakpoints.R
@@ -0,0 +1,18 @@
+test_that("find_breakpoints() works", {
+ data(rbps)
+ ps <- position_scores("MN395291-1", "ON513429-1", data = rbps)
+ cemean <- find_breakpoints(ps, method = "cemean", Nmax = 5)
+ ewma <- find_breakpoints(ps, method = "plateau", type = "ewma", lambda = 0.2)
+ cusum <- find_breakpoints(ps, method = "plateau", type = "cusum")
+ window <- find_breakpoints(ps, method = "window", window = 5)
+
+ expect_true(inherits(cemean, "breakpoints"))
+ expect_equal(dim(cemean), c(5, 6))
+ expect_equal(cemean$pident[1], 0.947)
+ expect_equal(dim(ewma), c(6, 6))
+ expect_equal(ewma$pident[1], 0.936)
+ expect_equal(dim(cusum), c(4, 6))
+ expect_equal(cusum$pident[1], 0.936)
+ expect_equal(dim(window), c(69, 6))
+ expect_equal(window$pident[1], 0.886)
+})
diff --git a/tests/testthat/test-homogeneity.R b/tests/testthat/test-homogeneity.R
new file mode 100644
index 0000000..8891436
--- /dev/null
+++ b/tests/testthat/test-homogeneity.R
@@ -0,0 +1,11 @@
+test_that("homogeneity() works", {
+ data(rbps)
+ data(adapters)
+ amat <- adapter_matrix(adapters, ids = rbps$Core_ORF)
+ clusters <- cluster_adapters(amat, k_min = 2, k_max = 2)
+ ids <- clusters |> dplyr::filter(cluster == "ACL 1") |> dplyr::pull(id)
+ index <- which(rownames(amat) %in% ids)
+ out <- homogeneity(mat = amat[index, index])
+
+ expect_equal(out, 1)
+})
diff --git a/tests/testthat/test-plot_adapter_matrix.R b/tests/testthat/test-plot_adapter_matrix.R
new file mode 100755
index 0000000..77b5c8f
--- /dev/null
+++ b/tests/testthat/test-plot_adapter_matrix.R
@@ -0,0 +1,12 @@
+test_that("plot() works with adapter matrices", {
+ skip_on_ci()
+ data(rbps)
+ data(adapters)
+ amat <- adapter_matrix(adapters)
+ g1 <- plot(amat)
+ clusters <- cluster_adapters(amat, k_min = 2, k_max = 2)
+ g2 <- plot(amat, clusters = clusters)
+
+ vdiffr::expect_doppelganger("amat-without-clusters", g1)
+ vdiffr::expect_doppelganger("amat-with-clusters", g2)
+})
diff --git a/tests/testthat/test-plot_position_scores.R b/tests/testthat/test-plot_position_scores.R
new file mode 100755
index 0000000..f7e36d4
--- /dev/null
+++ b/tests/testthat/test-plot_position_scores.R
@@ -0,0 +1,8 @@
+test_that("plot() works with position scores", {
+ skip_on_ci()
+ data(rbps)
+ ps <- position_scores("MN395291-1", "ON513429-1", data = rbps)
+ g <- plot(ps)
+
+ vdiffr::expect_doppelganger("position-scores", g)
+})
diff --git a/tests/testthat/test-schoco.R b/tests/testthat/test-schoco.R
new file mode 100644
index 0000000..e69de29