fmicompbio · mbstadler · Aug 8, 2025 · Jun 1, 2025 · Jun 1, 2025 · Jun 1, 2025
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -18,9 +18,9 @@ jobs:
       fail-fast: false
       matrix:
         config:
-        - { os: macOS-latest, bioc: 'release', curlConfigPath: '/usr/bin/'}
-        - { os: windows-latest, bioc: 'release'}
-        - { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_20", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
+        - { os: macOS-latest, bioc: 'devel', curlConfigPath: '/usr/bin/'}
+        - { os: windows-latest, bioc: 'devel'}
+        - { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:devel", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mutscan
 Title: Preprocessing and Analysis of Deep Mutational Scanning Data
-Version: 0.3.4
+Version: 0.99.0
 Authors@R: 
     c(person(given = "Charlotte",
              family = "Soneson",
@@ -20,7 +20,7 @@ Description: Provides functionality for processing and statistical analysis
     files to publication-ready visualizations. A broad range of library 
     designs can be processed with a single, unified interface. 
 Depends:
-    R (>= 3.5)
+    R (>= 4.5.0)
 Imports:
     BiocGenerics,
     S4Vectors,
@@ -45,18 +45,18 @@ Imports:
     DT,
     ggrepel,
     IRanges,
-    zlibbioc,
     utils,
     DelayedArray,
-    lifecycle
+    tools
 Suggests: 
     testthat (>= 3.0.0),
     BiocStyle,
     knitr,
     Biostrings,
     pwalign,
     plotly,
-    scattermore
+    scattermore,
+    BiocManager
 SystemRequirements: GNU make
 biocViews: GeneticVariability, GenomicVariation, Preprocessing
 License: MIT + file LICENSE
@@ -66,3 +66,5 @@ VignetteBuilder: knitr
 LinkingTo: 
     Rcpp
 Config/testthat/edition: 3
+URL: https://github.com/fmicompbio/mutscan
+BugReports: https://github.com/fmicompbio/mutscan/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -18,14 +18,12 @@ export(plotTotals)
 export(plotVolcano)
 export(relabelMutPositions)
 export(summarizeExperiment)
-import(zlibbioc)
-importFrom(BiocGenerics,paste)
+importFrom(BiocGenerics,"rownames<-")
 importFrom(BiocGenerics,rownames)
 importFrom(DT,datatable)
 importFrom(DelayedArray,rowsum)
 importFrom(GGally,eval_data_col)
 importFrom(GGally,ggpairs)
-importFrom(GGally,wrap)
 importFrom(IRanges,IntegerList)
 importFrom(Matrix,colSums)
 importFrom(Matrix,rowMeans)
@@ -34,14 +32,14 @@ importFrom(Rcpp,sourceCpp)
 importFrom(S4Vectors,DataFrame)
 importFrom(S4Vectors,metadata)
 importFrom(S4Vectors,unstrsplit)
+importFrom(SummarizedExperiment,"rowData<-")
 importFrom(SummarizedExperiment,SummarizedExperiment)
 importFrom(SummarizedExperiment,assay)
 importFrom(SummarizedExperiment,assayNames)
 importFrom(SummarizedExperiment,assays)
 importFrom(SummarizedExperiment,colData)
 importFrom(SummarizedExperiment,rowData)
 importFrom(csaw,normOffsets)
-importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
@@ -72,6 +70,7 @@ importFrom(ggplot2,after_stat)
 importFrom(ggplot2,annotate)
 importFrom(ggplot2,coord_cartesian)
 importFrom(ggplot2,element_blank)
+importFrom(ggplot2,element_rect)
 importFrom(ggplot2,element_text)
 importFrom(ggplot2,facet_wrap)
 importFrom(ggplot2,geom_abline)
@@ -99,9 +98,6 @@ importFrom(ggrepel,geom_text_repel)
 importFrom(grDevices,colorRamp)
 importFrom(grDevices,hcl.colors)
 importFrom(grDevices,rgb)
-importFrom(lifecycle,deprecate_warn)
-importFrom(lifecycle,deprecated)
-importFrom(lifecycle,is_present)
 importFrom(limma,contrasts.fit)
 importFrom(limma,eBayes)
 importFrom(limma,lmFit)
@@ -118,9 +114,10 @@ importFrom(tibble,rownames_to_column)
 importFrom(tidyr,gather)
 importFrom(tidyr,separate)
 importFrom(tidyr,separate_rows)
-importFrom(tidyr,unite)
 importFrom(tidyselect,matches)
+importFrom(tools,file_ext)
 importFrom(utils,globalVariables)
+importFrom(utils,packageVersion)
 importFrom(utils,relist)
 importFrom(xfun,Rscript_call)
 useDynLib(mutscan, .registration = TRUE)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# mutscan 0.99.0
+
+* Prepare for Bioconductor submission
+* Remove deprecated arguments variableCollapseMaxDist, variableCollapseMinReads and variableCollapseMinRatio from digestFastqs (deprecated in mutscan 0.3.0)
+
 # mutscan 0.3.4
 
 * Allow use of scattermore/scattermost in plotPairs

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -24,6 +24,10 @@ calcNearestStringDist <- function(x, metric = "hamming", nThreads = 1L) {
     .Call(`_mutscan_calcNearestStringDist`, x, metric, nThreads)
 }
 
+complement <- function(n) {
+    .Call(`_mutscan_complement`, n)
+}
+
 compareCodonPositions <- function(a, b, mutNameDelimiter) {
     .Call(`_mutscan_compareCodonPositions`, a, b, mutNameDelimiter)
 }
@@ -40,6 +44,10 @@ test_makeAAHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq) {
     .Call(`_mutscan_test_makeAAHGVS`, mutationsSorted, mutNameDelimiter, wtSeq)
 }
 
+test_compareToWildtype <- function(varSeq, wtSeq, varIntQual, forbiddenCodons_vect, mutatedPhredMin = 0.0, nbrMutatedCodonsMax = -1L, codonPrefix = "c", nbrMutatedBasesMax = -1L, mutNameDelimiter = ".", collapseToWT = FALSE) {
+    .Call(`_mutscan_test_compareToWildtype`, varSeq, wtSeq, varIntQual, forbiddenCodons_vect, mutatedPhredMin, nbrMutatedCodonsMax, codonPrefix, nbrMutatedBasesMax, mutNameDelimiter, collapseToWT)
+}
+
 test_decomposeRead <- function(sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength) {
     .Call(`_mutscan_test_decomposeRead`, sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength)
 }
@@ -57,45 +65,45 @@ findClosestRefSeqEarlyStop <- function(varSeq, wtSeq, upperBoundMismatch, sim) {
 }
 
 #' Create a conversion table for collapsing similar sequences
-#' @param seqs Character vector with nucleotide sequences (or pairs of 
-#' sequences concatenated with "_") to be collapsed. The sequences must 
+#' @param seqs Character vector with nucleotide sequences (or pairs of
+#' sequences concatenated with "_") to be collapsed. The sequences must
 #' all be of the same length.
 #' @param scores Numeric vector of "scores" for the sequences. Typically
-#' the total read/UMI count. A higher score will be preferred when 
-#' deciding which sequence to use as the representative for a group of 
+#' the total read/UMI count. A higher score will be preferred when
+#' deciding which sequence to use as the representative for a group of
 #' collapsed sequences.
-#' @param collapseMaxDist Numeric scalar defining the tolerance for collapsing 
-#' similar sequences. If the value is in [0, 1), it defines the maximal 
+#' @param collapseMaxDist Numeric scalar defining the tolerance for collapsing
+#' similar sequences. If the value is in [0, 1), it defines the maximal
 #' Hamming distance in terms of a fraction of sequence length:
 #' (\code{round(collapseMaxDist * nchar(sequence))}).
 #' A value greater or equal to 1 is rounded and directly used as the maximum
 #' allowed Hamming distance. Note that sequences can only be
-#' collapsed if they are all of the same length.
-#' @param collapseMinScore Numeric scalar, indicating the minimum score 
-#' required for a sequence to be considered as a representative for a 
-#' group of similar sequences (i.e., to allow other sequences to be 
-#' collapsed into it).
+#' collapsed if they are all of the same length. The default value is 0.
+#' @param collapseMinScore Numeric scalar, indicating the minimum score
+#' required for a sequence to be considered as a representative for a
+#' group of similar sequences (i.e., to allow other sequences to be
+#' collapsed into it). The default value is 0.
 #' @param collapseMinRatio Numeric scalar. During collapsing of
-#' similar sequences, a low-frequency sequence will be collapsed 
-#' with a higher-frequency sequence only if the ratio between the 
-#' high-frequency and the low-frequency scores is at least this 
+#' similar sequences, a low-frequency sequence will be collapsed
+#' with a higher-frequency sequence only if the ratio between the
+#' high-frequency and the low-frequency scores is at least this
 #' high. A value of 0 indicates that no such check is performed.
 #' @param verbose Logical scalar, whether to print progress messages.
-#' 
-#' @return A data.frame with two columns, containing the input sequences 
+#'
+#' @return A data.frame with two columns, containing the input sequences
 #' and the representatives for the groups resulting from grouping similar
 #' sequences, respectively.
-#' 
+#'
 #' @examples
 #' seqs <- c("AACGTAGCA", "ACCGTAGCA", "AACGGAGCA", "ATCGGAGCA", "TGAGGCATA")
 #' scores <- c(5, 1, 3, 1, 8)
-#' groupSimilarSequences(seqs = seqs, scores = scores, 
-#'                       collapseMaxDist = 1, collapseMinScore = 0, 
+#' groupSimilarSequences(seqs = seqs, scores = scores,
+#'                       collapseMaxDist = 1, collapseMinScore = 0,
 #'                       collapseMinRatio = 0, verbose = FALSE)
-#'                             
+#'
 #' @export
 #' @author Michael Stadler, Charlotte Soneson
-groupSimilarSequences <- function(seqs, scores, collapseMaxDist, collapseMinScore, collapseMinRatio, verbose) {
+groupSimilarSequences <- function(seqs, scores, collapseMaxDist = 0.0, collapseMinScore = 0.0, collapseMinRatio = 0.0, verbose = FALSE) {
     .Call(`_mutscan_groupSimilarSequences`, seqs, scores, collapseMaxDist, collapseMinScore, collapseMinRatio, verbose)
 }
 

diff --git a/R/calculateFitnessScore.R b/R/calculateFitnessScore.R
@@ -59,22 +59,23 @@ calculateFitnessScore <- function(se, pairingCol, ODCols, comparison, WTrows,
 
     ## pairingCol is in colData(se)
     .assertScalar(x = pairingCol, type = "character", 
-                  validValues = colnames(SummarizedExperiment::colData(se)))
+                  validValues = colnames(colData(se)))
 
     ## ODCols are all in colData(se) and contain numeric values
     .assertVector(x = ODCols, type = "character", rngLen = c(1, Inf),
-                  validValues = colnames(SummarizedExperiment::colData(se)))
+                  validValues = colnames(colData(se)))
     for (odc in ODCols) {
-        .assertVector(x = SummarizedExperiment::colData(se)[[odc]], 
+        .assertVector(x = colData(se)[[odc]], 
                       type = "numeric")
     }
 
     ## comparison is length(3)-character with column and values in colData(se)
     .assertVector(x = comparison, type = "character", len = 3)
     .assertScalar(x = comparison[1], type = "character", 
-                  validValues = colnames(SummarizedExperiment::colData(se)))
-    .assertVector(x = comparison[2:3], type = "character", 
-                  validValues = SummarizedExperiment::colData(se)[[comparison[1]]])
+                  validValues = colnames(colData(se)))
+    .assertVector(
+        x = comparison[2:3], type = "character", 
+        validValues = colData(se)[[comparison[1]]])
 
     ## there is exactly one observation per pairing and condition
     if (any(table(colData(se)[colData(se)[, comparison[1]] %in% 
@@ -100,25 +101,29 @@ calculateFitnessScore <- function(se, pairingCol, ODCols, comparison, WTrows,
                              colData(se_denominator)[, pairingCol])
     se_numerator <- se_numerator[, match(shared_repl, 
                                          colData(se_numerator)[, pairingCol])]
-    se_denominator <- se_denominator[, match(shared_repl, 
-                                             colData(se_denominator)[, pairingCol])]
+    se_denominator <- 
+        se_denominator[, match(shared_repl, 
+                               colData(se_denominator)[, pairingCol])]
 
     ## ------------------------------------------------------------------------
     ## calculate normalized counts (n_i)
     ## ------------------------------------------------------------------------
     norm_counts_numerator <- sweep(
         as.matrix(assay(se_numerator, selAssay)), MARGIN = 2, 
-        STATS = apply(colData(se_numerator)[, ODCols, drop = FALSE], MARGIN = 1, prod) /
-            Matrix::colSums(assay(se_numerator, selAssay)), 
+        STATS = apply(colData(se_numerator)[, ODCols, drop = FALSE], 
+                      MARGIN = 1, prod) /
+            colSums(assay(se_numerator, selAssay)), 
         FUN = "*")
     norm_counts_denominator <- sweep(
         as.matrix(assay(se_denominator, selAssay)), MARGIN = 2, 
-        STATS = apply(colData(se_denominator)[, ODCols, drop = FALSE], MARGIN = 1, prod) /
-            Matrix::colSums(assay(se_denominator, selAssay)), 
+        STATS = apply(colData(se_denominator)[, ODCols, drop = FALSE], 
+                      MARGIN = 1, prod) /
+            colSums(assay(se_denominator, selAssay)), 
         FUN = "*")
     n <- log2(norm_counts_numerator/norm_counts_denominator)
     n[!is.finite(n)] <- NA
-    colnames(n) <- paste0(comparison[2], "_vs_", comparison[3], "_repl", shared_repl)
+    colnames(n) <- paste0(comparison[2], "_vs_", comparison[3],
+                          "_repl", shared_repl)
 
 
     ## ------------------------------------------------------------------------
@@ -131,4 +136,4 @@ calculateFitnessScore <- function(se, pairingCol, ODCols, comparison, WTrows,
     }
     fitness <- sweep(n, MARGIN = 2, STATS = nWT, FUN = "/")
     return(fitness)
-}
+}