Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
c28f1c8
Remove dependency on deprecated zlibbioc
csoneson Jun 1, 2025
bf206bb
Add BiocManager to Suggests to address R CMD check warning
csoneson Jun 1, 2025
370e6ab
Escape braces in documentation
csoneson Jun 1, 2025
7e790a3
Test against Bioc devel on GHA
csoneson Jun 1, 2025
9d6d7c9
Depend on R>=4.5.0
csoneson Jun 1, 2025
dcf2136
Add URL and BugReports fields
csoneson Jun 1, 2025
03bab67
Cleanup, utils.R
csoneson Jun 1, 2025
7064338
Cleanup, vignette
csoneson Jun 1, 2025
ef8df50
Cleanup, generateQCReport.R
csoneson Jun 1, 2025
838bf64
Cleanup, plotTotals.R
csoneson Jun 1, 2025
bed42f6
Cleanup, relabelMutPositions.R
csoneson Jun 1, 2025
5e2ca1e
Cleanup, summarizeExperiment.R
csoneson Jun 1, 2025
cbd2cd5
Cleanup, calculateFitnessScore.R
csoneson Jun 1, 2025
f675a64
Cleanup, calculateRelativeFC.R
csoneson Jun 1, 2025
35e9172
Cleanup, plotResults.R
csoneson Jun 1, 2025
2a6d658
Cleanup, collapseMutantsByAA.R
csoneson Jun 2, 2025
82071a8
Cleanup, digestFastqs
csoneson Jun 2, 2025
76d024b
Cleanup, linkMultipleVariants.R
csoneson Jun 2, 2025
1c3a48a
Add import
csoneson Jun 2, 2025
1255ed0
Cleanup, plotDistributions.R
csoneson Jun 2, 2025
c5a9cb6
Cleanup, plotFiltering.R
csoneson Jun 2, 2025
d6c1453
Cleanup, plotPairs.R
csoneson Jun 2, 2025
e71468f
Swap %>% for |>
csoneson Jun 2, 2025
182bebc
Import rowData<-
csoneson Jun 2, 2025
f7950c2
Bump version, update NEWS
csoneson Jun 2, 2025
1560e1e
Use expect_true(is_ggplot(.)) instead of expect_s3_class(., "ggplot")
csoneson Jul 18, 2025
2d72661
Add chunk labels to vignette
csoneson Jul 26, 2025
3b1928b
Fix a couple of long lines
csoneson Jul 26, 2025
2975358
Remove deprecated arguments to digestFastqs
csoneson Jul 31, 2025
5579e3d
Set default values of collapse parameters
csoneson Jul 31, 2025
1a1cef3
Add example to linkMultipleVariants
csoneson Jul 31, 2025
96ec0b3
Harmonize findClosestRefSeq with findClosestRefSeqEarlyStop (only com…
csoneson Aug 4, 2025
80c6260
ignore test coverage for write failures
mbstadler Aug 7, 2025
ec1a439
export and test C `complement` function
mbstadler Aug 7, 2025
fb60010
ignore unit test coverage of hard/impossible to trigger failures in r…
mbstadler Aug 7, 2025
03e225c
ignore hard to trigger file open error for test coverage in openFastq
mbstadler Aug 7, 2025
20b42b3
Add some more tests, ignore lines that are not recognized by covr
csoneson Aug 7, 2025
24a66e7
Merge branch 'bioc' of github.com:fmicompbio/mutscan into bioc
mbstadler Aug 7, 2025
8219b74
add test_compareToWildtype and corresponding unit tests
mbstadler Aug 7, 2025
544a5a5
ignore coverage of lines that are unreachable through the R API
mbstadler Aug 7, 2025
d9c4704
ignore coverage for lines that are hard to trigger in tests in variou…
mbstadler Aug 7, 2025
a1525ce
add test coverage of digestFastqsCpp
mbstadler Aug 7, 2025
22fd02f
add unit test coverage for makeBaseHGVS, makeAAHGVS and groupSimilarS…
mbstadler Aug 7, 2025
ccb6a0c
Skip likely unreachable sections
csoneson Aug 7, 2025
cc199f8
Add unit tests
csoneson Aug 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
fail-fast: false
matrix:
config:
- { os: macOS-latest, bioc: 'release', curlConfigPath: '/usr/bin/'}
- { os: windows-latest, bioc: 'release'}
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:RELEASE_3_20", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
- { os: macOS-latest, bioc: 'devel', curlConfigPath: '/usr/bin/'}
- { os: windows-latest, bioc: 'devel'}
- { os: ubuntu-latest, image: "bioconductor/bioconductor_docker:devel", cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
Expand Down
12 changes: 7 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mutscan
Title: Preprocessing and Analysis of Deep Mutational Scanning Data
Version: 0.3.4
Version: 0.99.0
Authors@R:
c(person(given = "Charlotte",
family = "Soneson",
Expand All @@ -20,7 +20,7 @@ Description: Provides functionality for processing and statistical analysis
files to publication-ready visualizations. A broad range of library
designs can be processed with a single, unified interface.
Depends:
R (>= 3.5)
R (>= 4.5.0)
Imports:
BiocGenerics,
S4Vectors,
Expand All @@ -45,18 +45,18 @@ Imports:
DT,
ggrepel,
IRanges,
zlibbioc,
utils,
DelayedArray,
lifecycle
tools
Suggests:
testthat (>= 3.0.0),
BiocStyle,
knitr,
Biostrings,
pwalign,
plotly,
scattermore
scattermore,
BiocManager
SystemRequirements: GNU make
biocViews: GeneticVariability, GenomicVariation, Preprocessing
License: MIT + file LICENSE
Expand All @@ -66,3 +66,5 @@ VignetteBuilder: knitr
LinkingTo:
Rcpp
Config/testthat/edition: 3
URL: https://github.com/fmicompbio/mutscan
BugReports: https://github.com/fmicompbio/mutscan/issues
13 changes: 5 additions & 8 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@ export(plotTotals)
export(plotVolcano)
export(relabelMutPositions)
export(summarizeExperiment)
import(zlibbioc)
importFrom(BiocGenerics,paste)
importFrom(BiocGenerics,"rownames<-")
importFrom(BiocGenerics,rownames)
importFrom(DT,datatable)
importFrom(DelayedArray,rowsum)
importFrom(GGally,eval_data_col)
importFrom(GGally,ggpairs)
importFrom(GGally,wrap)
importFrom(IRanges,IntegerList)
importFrom(Matrix,colSums)
importFrom(Matrix,rowMeans)
Expand All @@ -34,14 +32,14 @@ importFrom(Rcpp,sourceCpp)
importFrom(S4Vectors,DataFrame)
importFrom(S4Vectors,metadata)
importFrom(S4Vectors,unstrsplit)
importFrom(SummarizedExperiment,"rowData<-")
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(SummarizedExperiment,assay)
importFrom(SummarizedExperiment,assayNames)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(csaw,normOffsets)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
Expand Down Expand Up @@ -72,6 +70,7 @@ importFrom(ggplot2,after_stat)
importFrom(ggplot2,annotate)
importFrom(ggplot2,coord_cartesian)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_rect)
importFrom(ggplot2,element_text)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_abline)
Expand Down Expand Up @@ -99,9 +98,6 @@ importFrom(ggrepel,geom_text_repel)
importFrom(grDevices,colorRamp)
importFrom(grDevices,hcl.colors)
importFrom(grDevices,rgb)
importFrom(lifecycle,deprecate_warn)
importFrom(lifecycle,deprecated)
importFrom(lifecycle,is_present)
importFrom(limma,contrasts.fit)
importFrom(limma,eBayes)
importFrom(limma,lmFit)
Expand All @@ -118,9 +114,10 @@ importFrom(tibble,rownames_to_column)
importFrom(tidyr,gather)
importFrom(tidyr,separate)
importFrom(tidyr,separate_rows)
importFrom(tidyr,unite)
importFrom(tidyselect,matches)
importFrom(tools,file_ext)
importFrom(utils,globalVariables)
importFrom(utils,packageVersion)
importFrom(utils,relist)
importFrom(xfun,Rscript_call)
useDynLib(mutscan, .registration = TRUE)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# mutscan 0.99.0

* Prepare for Bioconductor submission
* Remove deprecated arguments variableCollapseMaxDist, variableCollapseMinReads and variableCollapseMinRatio from digestFastqs (deprecated in mutscan 0.3.0)

# mutscan 0.3.4

* Allow use of scattermore/scattermost in plotPairs
Expand Down
50 changes: 29 additions & 21 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ calcNearestStringDist <- function(x, metric = "hamming", nThreads = 1L) {
.Call(`_mutscan_calcNearestStringDist`, x, metric, nThreads)
}

complement <- function(n) {
.Call(`_mutscan_complement`, n)
}

compareCodonPositions <- function(a, b, mutNameDelimiter) {
.Call(`_mutscan_compareCodonPositions`, a, b, mutNameDelimiter)
}
Expand All @@ -40,6 +44,10 @@ test_makeAAHGVS <- function(mutationsSorted, mutNameDelimiter, wtSeq) {
.Call(`_mutscan_test_makeAAHGVS`, mutationsSorted, mutNameDelimiter, wtSeq)
}

test_compareToWildtype <- function(varSeq, wtSeq, varIntQual, forbiddenCodons_vect, mutatedPhredMin = 0.0, nbrMutatedCodonsMax = -1L, codonPrefix = "c", nbrMutatedBasesMax = -1L, mutNameDelimiter = ".", collapseToWT = FALSE) {
.Call(`_mutscan_test_compareToWildtype`, varSeq, wtSeq, varIntQual, forbiddenCodons_vect, mutatedPhredMin, nbrMutatedCodonsMax, codonPrefix, nbrMutatedBasesMax, mutNameDelimiter, collapseToWT)
}

test_decomposeRead <- function(sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength) {
.Call(`_mutscan_test_decomposeRead`, sseq, squal, elements, elementLengths, primerSeqs, umiSeq, varSeq, varQual, varLengths, constSeq, constQual, nNoPrimer, nReadWrongLength)
}
Expand All @@ -57,45 +65,45 @@ findClosestRefSeqEarlyStop <- function(varSeq, wtSeq, upperBoundMismatch, sim) {
}

#' Create a conversion table for collapsing similar sequences
#' @param seqs Character vector with nucleotide sequences (or pairs of
#' sequences concatenated with "_") to be collapsed. The sequences must
#' @param seqs Character vector with nucleotide sequences (or pairs of
#' sequences concatenated with "_") to be collapsed. The sequences must
#' all be of the same length.
#' @param scores Numeric vector of "scores" for the sequences. Typically
#' the total read/UMI count. A higher score will be preferred when
#' deciding which sequence to use as the representative for a group of
#' the total read/UMI count. A higher score will be preferred when
#' deciding which sequence to use as the representative for a group of
#' collapsed sequences.
#' @param collapseMaxDist Numeric scalar defining the tolerance for collapsing
#' similar sequences. If the value is in [0, 1), it defines the maximal
#' @param collapseMaxDist Numeric scalar defining the tolerance for collapsing
#' similar sequences. If the value is in [0, 1), it defines the maximal
#' Hamming distance in terms of a fraction of sequence length:
#' (\code{round(collapseMaxDist * nchar(sequence))}).
#' A value greater or equal to 1 is rounded and directly used as the maximum
#' allowed Hamming distance. Note that sequences can only be
#' collapsed if they are all of the same length.
#' @param collapseMinScore Numeric scalar, indicating the minimum score
#' required for a sequence to be considered as a representative for a
#' group of similar sequences (i.e., to allow other sequences to be
#' collapsed into it).
#' collapsed if they are all of the same length. The default value is 0.
#' @param collapseMinScore Numeric scalar, indicating the minimum score
#' required for a sequence to be considered as a representative for a
#' group of similar sequences (i.e., to allow other sequences to be
#' collapsed into it). The default value is 0.
#' @param collapseMinRatio Numeric scalar. During collapsing of
#' similar sequences, a low-frequency sequence will be collapsed
#' with a higher-frequency sequence only if the ratio between the
#' high-frequency and the low-frequency scores is at least this
#' similar sequences, a low-frequency sequence will be collapsed
#' with a higher-frequency sequence only if the ratio between the
#' high-frequency and the low-frequency scores is at least this
#' high. A value of 0 indicates that no such check is performed.
#' @param verbose Logical scalar, whether to print progress messages.
#'
#' @return A data.frame with two columns, containing the input sequences
#'
#' @return A data.frame with two columns, containing the input sequences
#' and the representatives for the groups resulting from grouping similar
#' sequences, respectively.
#'
#'
#' @examples
#' seqs <- c("AACGTAGCA", "ACCGTAGCA", "AACGGAGCA", "ATCGGAGCA", "TGAGGCATA")
#' scores <- c(5, 1, 3, 1, 8)
#' groupSimilarSequences(seqs = seqs, scores = scores,
#' collapseMaxDist = 1, collapseMinScore = 0,
#' groupSimilarSequences(seqs = seqs, scores = scores,
#' collapseMaxDist = 1, collapseMinScore = 0,
#' collapseMinRatio = 0, verbose = FALSE)
#'
#'
#' @export
#' @author Michael Stadler, Charlotte Soneson
groupSimilarSequences <- function(seqs, scores, collapseMaxDist, collapseMinScore, collapseMinRatio, verbose) {
groupSimilarSequences <- function(seqs, scores, collapseMaxDist = 0.0, collapseMinScore = 0.0, collapseMinRatio = 0.0, verbose = FALSE) {
.Call(`_mutscan_groupSimilarSequences`, seqs, scores, collapseMaxDist, collapseMinScore, collapseMinRatio, verbose)
}

Expand Down
33 changes: 19 additions & 14 deletions R/calculateFitnessScore.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,22 +59,23 @@ calculateFitnessScore <- function(se, pairingCol, ODCols, comparison, WTrows,

## pairingCol is in colData(se)
.assertScalar(x = pairingCol, type = "character",
validValues = colnames(SummarizedExperiment::colData(se)))
validValues = colnames(colData(se)))

## ODCols are all in colData(se) and contain numeric values
.assertVector(x = ODCols, type = "character", rngLen = c(1, Inf),
validValues = colnames(SummarizedExperiment::colData(se)))
validValues = colnames(colData(se)))
for (odc in ODCols) {
.assertVector(x = SummarizedExperiment::colData(se)[[odc]],
.assertVector(x = colData(se)[[odc]],
type = "numeric")
}

## comparison is length(3)-character with column and values in colData(se)
.assertVector(x = comparison, type = "character", len = 3)
.assertScalar(x = comparison[1], type = "character",
validValues = colnames(SummarizedExperiment::colData(se)))
.assertVector(x = comparison[2:3], type = "character",
validValues = SummarizedExperiment::colData(se)[[comparison[1]]])
validValues = colnames(colData(se)))
.assertVector(
x = comparison[2:3], type = "character",
validValues = colData(se)[[comparison[1]]])

## there is exactly one observation per pairing and condition
if (any(table(colData(se)[colData(se)[, comparison[1]] %in%
Expand All @@ -100,25 +101,29 @@ calculateFitnessScore <- function(se, pairingCol, ODCols, comparison, WTrows,
colData(se_denominator)[, pairingCol])
se_numerator <- se_numerator[, match(shared_repl,
colData(se_numerator)[, pairingCol])]
se_denominator <- se_denominator[, match(shared_repl,
colData(se_denominator)[, pairingCol])]
se_denominator <-
se_denominator[, match(shared_repl,
colData(se_denominator)[, pairingCol])]

## ------------------------------------------------------------------------
## calculate normalized counts (n_i)
## ------------------------------------------------------------------------
norm_counts_numerator <- sweep(
as.matrix(assay(se_numerator, selAssay)), MARGIN = 2,
STATS = apply(colData(se_numerator)[, ODCols, drop = FALSE], MARGIN = 1, prod) /
Matrix::colSums(assay(se_numerator, selAssay)),
STATS = apply(colData(se_numerator)[, ODCols, drop = FALSE],
MARGIN = 1, prod) /
colSums(assay(se_numerator, selAssay)),
FUN = "*")
norm_counts_denominator <- sweep(
as.matrix(assay(se_denominator, selAssay)), MARGIN = 2,
STATS = apply(colData(se_denominator)[, ODCols, drop = FALSE], MARGIN = 1, prod) /
Matrix::colSums(assay(se_denominator, selAssay)),
STATS = apply(colData(se_denominator)[, ODCols, drop = FALSE],
MARGIN = 1, prod) /
colSums(assay(se_denominator, selAssay)),
FUN = "*")
n <- log2(norm_counts_numerator/norm_counts_denominator)
n[!is.finite(n)] <- NA
colnames(n) <- paste0(comparison[2], "_vs_", comparison[3], "_repl", shared_repl)
colnames(n) <- paste0(comparison[2], "_vs_", comparison[3],
"_repl", shared_repl)


## ------------------------------------------------------------------------
Expand All @@ -131,4 +136,4 @@ calculateFitnessScore <- function(se, pairingCol, ODCols, comparison, WTrows,
}
fitness <- sweep(n, MARGIN = 2, STATS = nWT, FUN = "/")
return(fitness)
}
}
Loading
Loading