diff --git a/.github/workflows/check-standard-test.yaml b/.github/workflows/check-standard-test.yaml new file mode 100644 index 00000000..790066f3 --- /dev/null +++ b/.github/workflows/check-standard-test.yaml @@ -0,0 +1,105 @@ +name: R-CMD-check + +on: + pull_request: + branches: [main] + workflow_dispatch: + +jobs: + R-CMD-check: + runs-on: ${{ matrix.os }} + name: ${{ matrix.os }} + timeout-minutes: 120 + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + ENABLE_INSTALL_COUNTING: no + _R_CHECK_BUILD_VIGNETTES_: false + + steps: + - uses: actions/checkout@v4 + - name: Install binary dependencies on ubuntu + if: matrix.os == 'ubuntu-latest' + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y libhdf5-dev libhwy-dev + sudo apt-get isntall -y libhwy-dev || true + + - name: Install binary dependencies on macOS + if: matrix.os == 'macos-latest' || matrix.os == 'macos-13' + shell: bash + run: | + brew update + brew install hdf5 pkg-config highway + + - name: Configure compilation flags + shell: bash + run: | + cat <<'MAKEVARS' > "$GITHUB_WORKSPACE/Makevars.user" + MAKEFLAGS=--jobs=3 + MAKEVARS + echo "R_MAKEVARS_USER=$GITHUB_WORKSPACE/Makevars.user" >> "$GITHUB_ENV" + + - name: Install Highway on Windows (MSYS2 UCRT64) + if: matrix.os == 'windows-latest' + uses: msys2/setup-msys2@v2 + with: + release: false + update: true + cache: true + install: >- + base-devel + git + mingw-w64-ucrt-x86_64-toolchain + mingw-w64-ucrt-x86_64-pkgconf + mingw-w64-ucrt-x86_64-highway + msystem: UCRT64 + path-type: minimal + + - name: Expose UCRT64 pkg-config to PATH (Windows) + if: matrix.os == 'windows-latest' + shell: bash + run: | + echo "C:/msys64/ucrt64/bin" >> $GITHUB_PATH + # Ensure pkg-config finds the .pc files for UCRT64 + echo "PKG_CONFIG_PATH=C:/msys64/ucrt64/lib/pkgconfig" >> $GITHUB_ENV + echo "PKG_CONFIG=C:/msys64/ucrt64/bin/pkg-config.exe" >> $GITHUB_ENV + + - name: Handle Windows big object files + if: matrix.os == 'windows-latest' + shell: bash + run: | + echo "CXXFLAGS += -Wa,-mbig-obj" >> "$GITHUB_WORKSPACE/Makevars.user" + + - uses: r-lib/actions/setup-r@v2 + with: + Ncpus: '3' + use-public-rspm: true + extra-repositories: | + https://bioconductor.org/packages/release/data/annotation + https://bpcells.github.io/drat + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache-version: 1 + extra-packages: | + any::testthat + any::decor + working-directory: 'r' + needs: check + - name: Check package + uses: r-lib/actions/check-r-package@v2 + with: + working-directory: r + build_args: c("--no-build-vignettes") + upload-snapshots: false + error-on: '"error"' + + diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index a6b7bf67..29eb5ed6 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -18,6 +18,12 @@ jobs: run: bash -c 'echo -e "MAKEFLAGS=--jobs=3\nCXXFLAGS += -O1 -UNDEBUG" > "$GITHUB_WORKSPACE/Makevars.user" && echo "R_MAKEVARS_USER=$GITHUB_WORKSPACE/Makevars.user" >> "$GITHUB_ENV"' - name: Setup R uses: r-lib/actions/setup-r@v2 + with: + Ncpus: '3' + use-public-rspm: true + extra-repositories: | + https://bioconductor.org/packages/release/data/annotation + https://bpcells.github.io/drat - name: Install R dependencies uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/.github/workflows/deploy-full-website.yml b/.github/workflows/deploy-full-website.yml index 64b4fd37..39a52c0b 100644 --- a/.github/workflows/deploy-full-website.yml +++ b/.github/workflows/deploy-full-website.yml @@ -16,6 +16,12 @@ jobs: run: bash -c 'echo -e "MAKEFLAGS=--jobs=3\nCXXFLAGS += -O1 -UNDEBUG" > "$GITHUB_WORKSPACE/Makevars.user" && echo "R_MAKEVARS_USER=$GITHUB_WORKSPACE/Makevars.user" >> "$GITHUB_ENV"' - name: Setup R uses: r-lib/actions/setup-r@v2 + with: + Ncpus: '3' + use-public-rspm: true + extra-repositories: | + https://bioconductor.org/packages/release/data/annotation + https://bpcells.github.io/drat - name: Install R dependencies uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/.github/workflows/r-test.yml b/.github/workflows/r-test.yml index aadedf6d..0af5c491 100644 --- a/.github/workflows/r-test.yml +++ b/.github/workflows/r-test.yml @@ -38,6 +38,9 @@ jobs: with: Ncpus: '3' use-public-rspm: true + extra-repositories: | + https://bioconductor.org/packages/release/data/annotation + https://bpcells.github.io/drat - uses: r-lib/actions/setup-r-dependencies@v2 with: cache-version: 1 diff --git a/DESCRIPTION b/DESCRIPTION index 45acbe15..4706b080 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,5 +2,5 @@ Package: BPCells.installation.moved Title: Dummy package to provide new BPCells installation location Version: 0.0.0.9000 Description: This package prints an error message upon attempted installation -License: Apache-2.0 or MIT +License: Apache License (== 2.0) | MIT + file LICENSE Encoding: UTF-8 diff --git a/python/docs/source/notebooks/fragment_basics.md b/python/docs/source/notebooks/fragment_basics.md index ca49ffc8..25f5fdac 100644 --- a/python/docs/source/notebooks/fragment_basics.md +++ b/python/docs/source/notebooks/fragment_basics.md @@ -108,7 +108,7 @@ cluster_order = sorted(set(clusters)) cell_groups_array = bpcells.experimental.build_cell_groups(fragments_bpcells_path, barcodes, clusters, cluster_order) # We could provide a dict or local file path, but URL is easier -chrom_sizes = "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.chrom.sizes" +chrom_sizes = "https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.chrom.sizes" insertions_matrix_path = os.path.join(tmpdir.name, "bpcells_insertions_matrix") diff --git a/python/tests/test_real_data.py b/python/tests/test_real_data.py index b111f0cd..15d8c392 100644 --- a/python/tests/test_real_data.py +++ b/python/tests/test_real_data.py @@ -40,7 +40,7 @@ def test_500_pbmc_matrix(tmp_path, fetch_cached_file): peaks_path = fetch_cached_file("https://cf.10xgenomics.com/samples/cell-atac/2.0.0/atac_pbmc_500_nextgem/atac_pbmc_500_nextgem_peaks.bed") peak_matrix_path = fetch_cached_file("https://cf.10xgenomics.com/samples/cell-atac/2.0.0/atac_pbmc_500_nextgem/atac_pbmc_500_nextgem_raw_peak_bc_matrix.h5") barcode_metrics_path = fetch_cached_file("https://cf.10xgenomics.com/samples/cell-atac/2.0.0/atac_pbmc_500_nextgem/atac_pbmc_500_nextgem_singlecell.csv") - chrom_sizes_path = fetch_cached_file("http://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.chrom.sizes") + chrom_sizes_path = fetch_cached_file("https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.chrom.sizes") # Convert BPCells fragments diff --git a/r/.Rbuildignore b/r/.Rbuildignore index a8a96d78..eab151b9 100644 --- a/r/.Rbuildignore +++ b/r/.Rbuildignore @@ -1,11 +1,18 @@ ^_pkgdown\.yml$ +^compile_commands\.json$ +^index\.md$ ^docs$ +^scripts$ +^data-raw$ ^pkgdown$ ^.*\.Rproj$ ^\.Rproj\.user$ +^tools/highway$ ^\.idea +^\.cache$ ^bench$ +^vignettes$ .*\.o$ .*\.so$ @@ -15,4 +22,6 @@ build/$ cmake-build-debug/ \.vscode/ -\.snakemake/ \ No newline at end of file +\.snakemake/ + +^cran-comments\.md$ diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 3dd6ad3f..5a27d22a 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,31 +1,34 @@ Package: BPCells Title: Single Cell Counts Matrices to PCA -Version: 0.3.1 +Version: 0.3.2 Authors@R: c( person(given = "Benjamin", family = "Parks", role = c("aut", "cre", "cph"), email = "bparks@alumni.stanford.edu", comment = c(ORCID = "0000-0002-0261-7472")), - person("Immanuel", "Abdi", role = "aut"), + person("Immanuel", "Abdi", email = "immanuelazn@berkeley.edu", role = "aut"), person("Stanford University", role=c("cph", "fnd")), person("Genentech, Inc.", role=c("cph", "fnd"))) -Description: > Efficient operations for single cell ATAC-seq fragments and +Description: Efficient operations for single cell ATAC-seq fragments and RNA counts matrices. Interoperable with standard file formats, and introduces efficient bit-packed formats that allow large storage savings and increased read speeds. -License: Apache-2.0 or MIT +License: Apache License (>= 2) | MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) -URL: https://bnprks.github.io/BPCells, https://github.com/bnprks/BPCells +URL: https://bnprks.github.io/BPCells/, https://github.com/bnprks/BPCells LinkingTo: Rcpp, RcppEigen -Imports: +Imports: methods, grDevices, + graphics, + stats, + utils, magrittr, Matrix, Rcpp, @@ -36,22 +39,48 @@ Imports: stringr, tibble, dplyr (>= 1.0.0), - tidyr, readr, ggplot2 (>= 3.4.0), scales, patchwork, + ragg, + purrr, scattermore, ggrepel, - RColorBrewer, - hexbin + RColorBrewer Suggests: IRanges, + tidyr, + hexbin, GenomicRanges, + GenomeInfoDb, + S4Vectors, matrixStats, + MatrixGenerics, + RSpectra, igraph, RcppHNSW, - RcppAnnoy + RcppAnnoy, + Seurat, + pkgdown, + devtools, + uwot, + irlba, + BiocManager, + BSgenome.Hsapiens.UCSC.hg38, + TFMPvalue (>= 0.0.5), + motifmatchr, + chromVARmotifs, + png, + testthat (>= 3.0.0) +Additional_repositories: + https://bioconductor.org/packages/release/data/annotation, + https://bpcells.github.io/drat Depends: R (>= 4.0.0) -Config/Needs/website: pkgdown, devtools, uwot, irlba, RcppHNSW, igraph, BiocManager, bioc::BSgenome.Hsapiens.UCSC.hg38, github::GreenleafLab/motifmatchr, github::GreenleafLab/chromVARmotifs, png, magrittr +SystemRequirements: C++17, + HDF5 (>= 1.10) [optional; detected via pkg-config 'hdf5' or 'h5cc'; + else set HDF5_CFLAGS/HDF5_LIBS], + Highway (>= 1.0.5) [optional; detected via pkg-config 'libhwy'/'hwy'; + falls back to vendored build if not found] +Config/Needs/website: pkgdown, devtools, uwot, irlba, RcppHNSW, igraph, BiocManager, bioc::BSgenome.Hsapiens.UCSC.hg38, github::GreenleafLab/motifmatchr, github::GreenleafLab/chromVARmotifs, png diff --git a/r/LICENSE b/r/LICENSE new file mode 100644 index 00000000..70bf9a3e --- /dev/null +++ b/r/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2025 +COPYRIGHT HOLDER: BPCells contributors \ No newline at end of file diff --git a/r/NAMESPACE b/r/NAMESPACE index 39e8b00f..94c115eb 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -27,15 +27,12 @@ export(cluster_graph_louvain) export(cluster_graph_seurat) export(colMaxs) export(colMaxs.IterableMatrix) -export(colMaxs.default) export(colMeans) export(colQuantiles) export(colQuantiles.IterableMatrix) -export(colQuantiles.default) export(colSums) export(colVars) export(colVars.IterableMatrix) -export(colVars.default) export(collect_features) export(continuous_palette) export(convert_matrix_type) @@ -92,6 +89,7 @@ export(plot_tss_scatter) export(prefix_cell_names) export(pseudobulk_matrix) export(qc_scATAC) +export(ragg_wrap) export(range_distance_to_nearest) export(read_bed) export(read_encode_blacklist) @@ -104,15 +102,13 @@ export(remove_demo_data) export(rotate_x_labels) export(rowMaxs) export(rowMaxs.IterableMatrix) -export(rowMaxs.default) export(rowMeans) export(rowQuantiles) export(rowQuantiles.IterableMatrix) -export(rowQuantiles.default) export(rowSums) export(rowVars) export(rowVars.IterableMatrix) -export(rowVars.default) +export(scale_next_plot_height) export(sctransform_pearson) export(select_cells) export(select_chromosomes) @@ -144,6 +140,7 @@ export(write_matrix_anndata_hdf5_dense) export(write_matrix_dir) export(write_matrix_hdf5) export(write_matrix_memory) +exportMethods("dimnames<-") exportMethods(as.data.frame) exportMethods(as.matrix) exportMethods(t) @@ -154,6 +151,9 @@ importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) importFrom(Matrix,t) importFrom(Rcpp,sourceCpp) +importFrom(ggplot2,element_text) +importFrom(ggplot2,theme) +importFrom(graphics,text) importFrom(magrittr,"%>%") importFrom(methods,.hasSlot) importFrom(methods,Arith) @@ -162,6 +162,7 @@ importFrom(methods,Math) importFrom(methods,Math2) importFrom(methods,as) importFrom(methods,callNextMethod) +importFrom(methods,canCoerce) importFrom(methods,cbind2) importFrom(methods,is) importFrom(methods,new) @@ -171,6 +172,19 @@ importFrom(methods,setClass) importFrom(methods,setGeneric) importFrom(methods,setMethod) importFrom(methods,show) +importFrom(ragg,agg_png) +importFrom(stats,as.formula) +importFrom(stats,end) +importFrom(stats,model.matrix) +importFrom(stats,p.adjust) +importFrom(stats,ppois) +importFrom(stats,qpois) +importFrom(stats,quantile) +importFrom(stats,start) +importFrom(utils,download.file) +importFrom(utils,head) +importFrom(utils,tail) +importFrom(utils,untar) importMethodsFrom(Matrix,colMeans) importMethodsFrom(Matrix,colSums) importMethodsFrom(Matrix,rowMeans) diff --git a/r/NEWS.md b/r/NEWS.md index ac6ca2a3..556b0d12 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -1,8 +1,25 @@ -# BPCells 0.4.0 (in-progress main branch) +# BPCells 0.4.0 (in-progress main branch) +## To-dos +- Add support for sparse pseudobulking in `pseudobulk_matrix()`. Currently in progress in #268. +- Add support for duplicate rows/cols in subsetting operations. +- Add support for matrix matrix addition. +- Maybe add CCA support? +- Refactor C++ backend to take in the logic in R S4 methods. This would allow for a cleaner seperation of R and C++ code, and would allow for much quicker porting + to Python in the future. + +# BPCells 0.3.2 +* Initial CRAN submission. + +The BPCells 0.3.2 release covers 6 months of changes and 8 commits from @immanuelazn. +Most notably, this release prepares (finally!) BPCells for CRAN submission. This means that users can easily install BPCells using pre-compiled binaries, rather than having to compile from source. +Full details of changes below. ## Breaking changes - Change first parameter name of `cluster_graph_leiden()`, `cluster_graph_louvain()` and `cluster_graph_seurat()` from `snn` to `mat` to more accurately reflect the input type. (pull request #292) +## Improvements +- Prepare BPCells for CRAN submission by adjusting styling and adding additional documentation for S3/S4 methods of IterableMatrix/IterableFragments. (pull request #311) + ## Features - Create a wrapper function `cluster_cells_graph()` that wraps the steps of knn object creation, graph adjacency creation, and clustering all within a single function (pull request #292) - Add `tile_width` and `normalization` arguments to `write_insertion_bedgraph()` to allow for more flexible bedgraph creation (pull request #299) @@ -13,13 +30,6 @@ - Fix error in documentation examples for `plot_embedding()`, resulting from the way documentation examples use nested function calls (pull request #316). - Fix error in `qc_scATAC()` when fragments are near the start of a chromosome (pull request #320). -## To-dos -- Add support for sparse pseudobulking in `pseudobulk_matrix()`. Currently in progress in #268. -- Add support for duplicate rows/cols in subsetting operations. -- Add support for matrix matrix addition. -- Maybe add CCA support? -- Refactor C++ backend to take in the logic in R S4 methods. This would allow for a cleaner seperation of R and C++ code, and would allow for much quicker porting - to Python in the future. # BPCells 0.3.1 (7/21/2025) diff --git a/r/R/BPCells-package.R b/r/R/BPCells-package.R index c8d63d69..fa679b39 100644 --- a/r/R/BPCells-package.R +++ b/r/R/BPCells-package.R @@ -10,9 +10,31 @@ #' @importFrom Rcpp sourceCpp #' @importClassesFrom Matrix dgCMatrix #' @importFrom Matrix t -#' @importFrom methods .hasSlot Arith as callNextMethod cbind2 Compare is Math Math2 new rbind2 setAs setClass setGeneric setMethod show +#' @importFrom methods .hasSlot Arith as callNextMethod canCoerce cbind2 Compare is Math Math2 new rbind2 setAs setClass setGeneric setMethod show +#' @importFrom stats as.formula end model.matrix p.adjust ppois qpois quantile start +#' @importFrom utils download.file head tail untar +#' @importFrom graphics text +#' @importFrom ggplot2 theme element_text NULL +# Suppress CMD check warnings for legitimate NSE usage +utils::globalVariables(c( + # Data manipulation variables (dplyr/data.table style) + ".", "chr", "start", "end", "strand", "counts", "group", "tile", "enrichment", + "p_val", "q_val", "group_rank", "from", "to", "neighbor_dist", "start_dist", + "end_dist", "idx", "pos", "normalized_insertions", "row_number", "start_tmp", + "feature", "transcript_id", "y", "size", "gene_name", "label", "color", "x", + "loop_id", "right", "left", "text", "origin", + + # Plot variables + "average", "percent", "proportion", "reads", "xmin", "xmax", "ymin", "ymax", + "nFrags", "TSSEnrichment", "cellName", "smoothed_enrichment", "position", + "genes", ".env", "track_label", + + # Data objects/mappings + "human_gene_mapping" +)) + #' @importFrom magrittr %>% #' @export magrittr::`%>%` diff --git a/r/R/atac_utils.R b/r/R/atac_utils.R index 62c98b68..fc2d0bb7 100644 --- a/r/R/atac_utils.R +++ b/r/R/atac_utils.R @@ -112,6 +112,9 @@ footprint <- function(fragments, ranges, zero_based_coords = !is(ranges, "GRange assert_is_wholenumber(flank) chr <- as.integer(factor(ranges$chr, chrNames(fragments))) - 1 + # Filter to ranges that overlap the chromosomes from `fragments` + ranges <- ranges[!is.na(chr),] + chr <- chr[!is.na(chr)] cell_groups <- as.factor(cell_groups) iter <- iterate_fragments(fragments) @@ -173,8 +176,8 @@ footprint <- function(fragments, ranges, zero_based_coords = !is(ranges, "GRange #' #' ArchR's `PromoterRatio` and `BlacklistRatio` are not included in the output, as they can be easily calculated #' from `ReadsInPromoter / nFrags` and `ReadsInBlacklist / nFrags`. Similarly, ArchR's `NucleosomeRatio` can be calculated -#' as `(monoNucleosomal + multiNucleosomal) / subNucleosomal`. #' @examples +#' \dontrun{ #' ## Prep data #' frags <- get_demo_frags(subset = FALSE) #' reference_dir <- file.path(tempdir(), "references") @@ -190,6 +193,16 @@ footprint <- function(fragments, ranges, zero_based_coords = !is(ranges, "GRange #' #' ## Run qc #' head(qc_scATAC(frags, genes, blacklist)) +#' } +#' +#' ## Read precomputed +#' head(readr::read_delim( +#' file.path( +#' system.file("extdata", package = "BPCells"), +#' "qc_results_filtered_example_chr_4.tsv.gz" +#' ), +#' delim = "\t", show_col_types = FALSE +#' )) #' @export qc_scATAC <- function(fragments, genes, blacklist) { assert_is(fragments, "IterableFragments") @@ -375,11 +388,28 @@ merge_peaks_iterative <- function(peaks) { #' ## Remove blacklist regions from fragments #' blacklist <- read_encode_blacklist(reference_dir, genome="hg38") #' frags_filter_blacklist <- select_regions(frags, blacklist, invert_selection = TRUE) -#' chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") %>% dplyr::filter(chr %in% c("chr4", "chr11")) +#' chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") %>% +#' dplyr::filter(chr %in% c("chr4", "chr11")) #' #' #' ## Call peaks +#' if (interactive()) { #' call_peaks_tile(frags_filter_blacklist, chrom_sizes, effective_genome_size = 2.8e9) +#' } +#' #> # A tibble: 73,160 x 7 +#' #> chr start end group p_val q_val enrichment +#' #> +#' #> 1 chr11 65615400 65615600 all 0 0 6764. +#' #> 2 chr4 2262266 2262466 all 0 0 6422. +#' #> 3 chr11 119057200 119057400 all 0 0 6188. +#' #> 4 chr11 695133 695333 all 0 0 6180. +#' #> 5 chr11 2400400 2400600 all 0 0 6166. +#' #> 6 chr4 1346933 1347133 all 0 0 6109. +#' #> 7 chr11 3797600 3797800 all 0 0 6017. +#' #> 8 chr11 64878600 64878800 all 0 0 5948. +#' #> 9 chr11 57667733 57667933 all 0 0 5946. +#' #> 10 chr11 83156933 83157133 all 0 0 5913. +#' #> # i 73,150 more rows #' @export call_peaks_tile <- function(fragments, chromosome_sizes, cell_groups = rep.int("all", length(cellNames(fragments))), effective_genome_size = NULL, @@ -495,6 +525,7 @@ call_peaks_tile <- function(fragments, chromosome_sizes, cell_groups = rep.int(" #' If a data.frame or list, must contain columns `chr` and `end` (See `help("genomic-ranges-like")`). #' If a numeric vector, then it is assumed to be the chromosome sizes in the order of `chrNames(fragments)`. #' @examples +#' \dontrun{ #' ## Prep data #' frags <- get_demo_frags() #' bedgraph_outputs <- file.path(tempdir(), "bedgraph_outputs") @@ -507,7 +538,7 @@ call_peaks_tile <- function(fragments, chromosome_sizes, cell_groups = rep.int(" #' list.files(bedgraph_outputs) #' #' # With tiling -#' chrom_sizes <- read_ucsc_chrom_sizes("./reference", genome="hg38") %>% +#' chrom_sizes <- read_ucsc_chrom_sizes(file.path(tempdir(), "references"), genome="hg38") %>% #' dplyr::filter(chr %in% c("chr4", "chr11")) #' write_insertion_bedgraph(frags, file.path(bedgraph_outputs, "all_tiled.bedGraph"), #' chrom_sizes = chrom_sizes, normalization_method = "cpm", tile_width = 100) @@ -515,8 +546,16 @@ call_peaks_tile <- function(fragments, chromosome_sizes, cell_groups = rep.int(" #' col_names = c("chr", "start", "end", "score"), #' show_col_types = FALSE) #' head(reads) -#' -#' +#' #> A tibble: 6 × 4 +#' #> chr start end score +#' #> +#' #> 1 chr4 10000 10100 1.45 +#' #> 2 chr4 10100 10200 0.869 +#' #> 3 chr4 10300 10400 0.290 +#' #> 4 chr4 10400 10500 0.145 +#' #> 5 chr4 10600 10700 0.434 +#' #> 6 chr4 11100 11200 0.145 +#' } #' @return `NULL` #' @inheritParams footprint #' @rdname write_insertion_bedgraph @@ -586,7 +625,7 @@ write_insertion_bedgraph <- function( #' ###################################################### #' ## `write_insertion_bed()` examples #' ###################################################### -#' +#' \dontrun{ #' # We utilize two groups this time #' bed_outputs <- file.path(tempdir(), "bed_outputs") #' cell_groups <- rep(c("A", "B"), length.out = length(cellNames(frags))) @@ -596,11 +635,29 @@ write_insertion_bedgraph <- function( #' frags, path = bed_paths, cell_groups = cell_groups, #' verbose = TRUE #' ) +#' #> 2026-01-08 21:19:41 Writing bed file for cluster: A +#' #> 2026-01-08 21:19:41 Bed file for cluster: A written to: +#' #> /tmp/RtmpgF9rbP/bed_outputs/A.bed +#' #> 2026-01-08 21:19:41 Writing bed file for cluster: B +#' #> 2026-01-08 21:19:42 Bed file for cluster: B written to: +#' #> /tmp/RtmpgF9rbP/bed_outputs/B.bed +#' #> 2026-01-08 21:19:42 Finished writing bed files #' list.files(bed_outputs) +#' #> [1] "A.bed" "B.bed" #' head(readr::read_tsv( #' file.path(bed_outputs, "A.bed"), #' col_names = c("chr", "start", "end"), show_col_types = FALSE) #' ) +#' #> # A tibble: 6 × 3 +#' #> chr start end +#' #> +#' #> 1 chr4 10035 10036 +#' #> 2 chr4 10045 10046 +#' #> 3 chr4 10045 10046 +#' #> 4 chr4 10046 10047 +#' #> 5 chr4 10046 10047 +#' #> 6 chr4 10066 10067 +#' } #' @export write_insertion_bed <- function(fragments, path, cell_groups = rlang::rep_along(cellNames(fragments), "all"), @@ -700,20 +757,20 @@ write_insertion_bed <- function(fragments, path, #' To run MACS manually, you will first run `call_peaks_macs()` with `step="prep-inputs`. Then, manually run all of the #' shell scripts generated at `/input/.sh`. Finally, run `call_peaks_macs()` again with the same original arguments, but #' setting `step="read-outputs"`. -#' @examples +#' @examplesIf tryCatch({ macs_path_is_valid(); TRUE }, error = function(e) FALSE) #' macs_files <- file.path(tempdir(), "peaks") #' frags <- get_demo_frags() -#' +#' #' head(call_peaks_macs(frags, macs_files)) -#' +#' #' ## Can also just run the input prep, then run macs manually #' ## by setting step to 'prep_inputs' #' macs_script <- call_peaks_macs(frags, macs_files, step = "prep-inputs") #' system2("bash", macs_script[1], stdout = FALSE, stderr = FALSE) -#' +#' #' ## Then read the narrow peaks files #' list.files(file.path(macs_files, "output", "all")) -#' +#' #' ## call_peaks_macs() can also solely perform the output reading step #' head(call_peaks_macs(frags, macs_files, step = "read-outputs")) #' @inheritParams call_peaks_tile @@ -831,6 +888,8 @@ call_peaks_macs <- function(fragments, path, #' `r lifecycle::badge("deprecated")` #' #' This function has been renamed to `call_peaks_macs()` +#' @examples +#' # See ?call_peaks_macs for examples #' @export #' @keywords internal call_macs_peaks <- function(...) { @@ -842,7 +901,7 @@ call_macs_peaks <- function(...) { #' Test if MACS executable is valid. #' If macs_executable is NULL, this function will try to auto-detect MACS from PATH, with preference for MACS3 over MACS2. #' If macs_executable is provided, this function will check if MACS can be called. -#' @return MACS executable path. +#' @return MACS executable path if valid, otherwise throws an error. #' @inheritParams call_peaks_macs #' @keywords internal macs_path_is_valid <- function(macs_executable) { @@ -889,3 +948,4 @@ range_overlaps <- function(a, b) { } %>% dplyr::arrange(from, to) } + diff --git a/r/R/clustering.R b/r/R/clustering.R index 9b1dd58d..db290575 100644 --- a/r/R/clustering.R +++ b/r/R/clustering.R @@ -80,6 +80,12 @@ is_adjacency_matrix <- function(mat) { #' Returns a factor vector of length `cells` with a cluster assignment for each cell. #' #' @seealso `knn_hnsw()` `knn_annoy()` `knn_to_graph()` `knn_to_snn_graph()` `knn_to_geodesic_graph()` `cluster_graph_leiden()` `cluster_graph_louvain()` `cluster_graph_seurat()` +#' @examples +#' set.seed(123) +#' mat <- matrix(rnorm(1000 * 10), nrow = 1000) +#' clusters <- cluster_cells_graph(mat, threads = 1) +#' table(clusters) +#' #' @export cluster_cells_graph <- function( mat, knn_method = knn_hnsw, @@ -161,6 +167,12 @@ knn_to_graph <- function(knn, use_weights = FALSE, self_loops = TRUE) { #' List of 3 equal-length vectors `i`, `j`, and `weight`, along with an integer `dim`. #' These correspond to the rows, cols, and values of non-zero entries in the lower triangle #' adjacency matrix. `dim` is the total number of vertices (cells) in the graph +#' @examples +#' set.seed(123) +#' mat <- matrix(rnorm(1000 * 10), nrow = 1000) +#' knn <- knn_hnsw(mat, k = 10) +#' graph <- knn_to_snn_graph(knn) +#' #' @export knn_to_snn_graph <- function(knn, min_val = 1 / 15, self_loops = FALSE, return_type=c("matrix", "list")) { return_type <- match.arg(return_type) @@ -215,6 +227,12 @@ knn_to_snn_graph <- function(knn, min_val = 1 / 15, self_loops = FALSE, return_t #' List of 3 equal-length vectors `i`, `j`, and `weight`, along with an integer `dim`. #' These correspond to the rows, cols, and values of non-zero entries in the lower triangle #' adjacency matrix. `dim` is the total number of vertices (cells) in the graph +#' @examples +#' set.seed(123) +#' mat <- matrix(rnorm(1000 * 10), nrow = 1000) +#' knn <- knn_hnsw(mat, k = 10) +#' graph <- knn_to_geodesic_graph(knn) +#' #' @export knn_to_geodesic_graph <- function(knn, return_type = c("matrix", "list"), threads = 0L) { return_type <- match.arg(return_type) @@ -247,6 +265,13 @@ knn_to_geodesic_graph <- function(knn, return_type = c("matrix", "list"), thread #' @param seed Random seed for clustering initialization #' @param ... Additional arguments to underlying clustering function #' @return Factor vector containing the cluster assignment for each cell. +#' @examples +#' set.seed(123) +#' mat <- matrix(rnorm(1000 * 10), nrow = 1000) +#' knn <- knn_hnsw(mat, k = 10) +#' graph <- knn_to_snn_graph(knn) +#' clusters <- cluster_graph_leiden(graph, resolution = 0.5) +#' #' @export cluster_graph_leiden <- function( mat, resolution = 1, objective_function = c("modularity", "CPM"), @@ -270,6 +295,13 @@ cluster_graph_leiden <- function( #' @rdname cluster_graph #' @details **cluster_graph_louvain**: Louvain graph clustering algorithm `igraph::cluster_louvain()` +#' @examples +#' set.seed(123) +#' mat <- matrix(rnorm(1000 * 10), nrow = 1000) +#' knn <- knn_hnsw(mat, k = 10) +#' graph <- knn_to_snn_graph(knn) +#' clusters <- cluster_graph_louvain(graph, resolution = 0.5) +#' #' @export cluster_graph_louvain <- function( mat, resolution = 1, seed = 12531 @@ -290,6 +322,15 @@ cluster_graph_louvain <- function( #' @rdname cluster_graph #' @details **cluster_graph_seurat**: Seurat's clustering algorithm `Seurat::FindClusters()` +#' @examples +#' \dontrun{ +#' set.seed(123) +#' mat <- matrix(rnorm(1000 * 10), nrow = 1000) +#' knn <- knn_hnsw(mat, k = 10) +#' graph <- knn_to_snn_graph(knn) +#' clusters <- cluster_graph_seurat(graph, resolution = 0.5) +#' } +#' #' @export cluster_graph_seurat <- function( mat, resolution = 0.8, ... @@ -357,6 +398,16 @@ cluster_membership_matrix <- function(groups, group_order = NULL) { #' #' If no query is given, nearest neighbors are found by mapping the data matrix to itself, #' likely including self-neighbors (i.e. `idx[c,1] == c` for most cells). +#' @examples +#' ############################################ +#' ## knn_annoy() example +#' ############################################ +#' set.seed(123) +#' mat <- matrix(rnorm(100 * 10), nrow = 10) +#' rownames(mat) <- paste0("cell", 1:10) +#' knn <- knn_hnsw(mat, k = 10) +#' knn +#' #' @export knn_hnsw <- function(data, query = NULL, k = 10, metric = c("euclidean", "cosine"), verbose = TRUE, threads = 1, ef = 100) { metric <- match.arg(metric) @@ -396,6 +447,15 @@ knn_hnsw <- function(data, query = NULL, k = 10, metric = c("euclidean", "cosine #' @details **knn_annoy**: Use RcppAnnoy as knn engine #' @param n_trees Number of trees during index build time. More trees gives higher accuracy #' @param search_k Number of nodes to inspect during the query, or -1 for default value. Higher number gives higher accuracy +#' @examples +#' ############################################ +#' ## knn_annoy() example +#' ############################################ +#' set.seed(123) +#' mat <- matrix(rnorm(100 * 10), nrow = 10) +#' rownames(mat) <- paste0("cell", 1:10) +#' knn <- knn_annoy(mat, k = 10) +#' knn #' @export knn_annoy <- function(data, query = NULL, k = 10, metric = c("euclidean", "cosine", "manhattan", "hamming"), n_trees = 50, search_k = -1) { metric <- match.arg(metric) diff --git a/r/R/data.R b/r/R/data.R index 5f50f9fe..05b5dd69 100644 --- a/r/R/data.R +++ b/r/R/data.R @@ -188,7 +188,7 @@ get_demo_mat <- function(filter_qc = TRUE, subset = TRUE) { if (!file.exists(file.path(data_dir, paste0(mat_name, ".tar.gz")))) { prepare_demo_data(data_dir, filter_qc = filter_qc, subset = subset) } else { - untar(file.path(data_dir, paste0(mat_name, ".tar.gz")), exdir=data_dir) + utils::untar(file.path(data_dir, paste0(mat_name, ".tar.gz")), exdir=data_dir) file.remove(file.path(data_dir, paste0(mat_name, ".tar.gz"))) } } @@ -222,7 +222,7 @@ get_demo_frags <- function(filter_qc = TRUE, subset = TRUE) { if (!file.exists(file.path(data_dir, paste0(frags_name, ".tar.gz")))) { prepare_demo_data(data_dir) } else { - untar(file.path(data_dir, paste0(frags_name, ".tar.gz")), exdir = data_dir) + utils::untar(file.path(data_dir, paste0(frags_name, ".tar.gz")), exdir = data_dir) file.remove(file.path(data_dir, paste0(frags_name, ".tar.gz"))) } } @@ -268,7 +268,7 @@ remove_demo_data <- function() { #' `data-raw/mouse_gene_mapping.R` for exactly how these mappings were made. #' @source **human_gene_mapping** #' -#' +#' #' #' @examples #' ####################################################################### @@ -286,8 +286,8 @@ remove_demo_data <- function() { #' #' @source **mouse_gene_mapping** #' -#' -#' +#' +#' #' @examples #' ####################################################################### #' ## mouse_gene_mapping diff --git a/r/R/errorChecking.R b/r/R/errorChecking.R index 80d36b06..df0cc034 100644 --- a/r/R/errorChecking.R +++ b/r/R/errorChecking.R @@ -230,6 +230,7 @@ normalize_unique_file_names <- function(names, replacement="_") { #' @param metadata_cols Optional list of metadata columns to require & extract #' @param zero_based_coords If true, coordinates start and 0 and the end coordinate is not included in the range. #' If false, coordinates start at 1 and the end coordinate is included in the range +#' @param n How many call frames to go up when printing errors #' @return data frame with zero-based coordinates, and elements chr (factor), start (int), and end (int). #' If `ranges` does not have chr level information, chr levels are the sorted unique values of chr. #' @@ -237,10 +238,11 @@ normalize_unique_file_names <- function(names, replacement="_") { #' and FALSE for negative strand. (Converted from a character vector of "+"/"-" if necessary) #' @examples #' ## Prep data +#' library(S4Vectors) #' ranges <- GenomicRanges::GRanges( -#' seqnames = S4Vectors::Rle(c("chr1", "chr2", "chr3"), c(1, 2, 2)), +#' seqnames = Rle(c("chr1", "chr2", "chr3"), c(1, 2, 2)), #' ranges = IRanges::IRanges(101:105, end = 111:115, names = head(letters, 5)), -#' strand = S4Vectors::Rle(GenomicRanges::strand(c("-", "+", "*")), c(1, 2, 2)), +#' strand = Rle(GenomicRanges::strand(c("-", "+", "*")), c(1, 2, 2)), #' score = 1:5, #' GC = seq(1, 0, length=5)) #' ranges diff --git a/r/R/fragments.R b/r/R/fragments.R index 6206bbae..7d63b142 100644 --- a/r/R/fragments.R +++ b/r/R/fragments.R @@ -10,6 +10,12 @@ #' #' Methods for IterableFragments objects #' +#' @param x an IterableFragments object +#' @param ... Additional arguments (not used unless specified) +#' @param row.names Optional row names for compatibility with `as.data.frame()`. +#' Ignored by IterableFragments methods. +#' @param optional Logical flag for compatibility with `as.data.frame()`; ignored. +#' @param value Replacement value (when applicable). #' @name IterableFragments-methods #' @rdname IterableFragments-methods NULL @@ -96,6 +102,7 @@ setMethod("show", "IterableFragments", function(object) { #' #' @export setGeneric("cellNames", function(x) standardGeneric("cellNames")) +#' @describeIn IterableFragments-methods Get cell names for IterableFragments setMethod("cellNames", "IterableFragments", function(x) { if (.hasSlot(x, "fragments")) { return(cellNames(x@fragments)) @@ -106,6 +113,7 @@ setMethod("cellNames", "IterableFragments", function(x) { #' Set cell names #' @param x an IterableFragments object #' @param value Character vector of new names +#' @param ... Additional arguments (not used) #' @details * `cellNames<-` It is only possible to replace names, not add new names. #' @describeIn IterableFragments-methods Set cell names #' @examples @@ -118,6 +126,7 @@ setMethod("cellNames", "IterableFragments", function(x) { #' #' @export setGeneric("cellNames<-", function(x, ..., value) standardGeneric("cellNames<-")) +#' @describeIn IterableFragments-methods Set cell names for IterableFragments setMethod("cellNames<-", "IterableFragments", function(x, ..., value) { if (is.null(cellNames(x))) { stop("Assigning new cellNames is not allowed, only renaming") @@ -140,6 +149,7 @@ setMethod("cellNames<-", "IterableFragments", function(x, ..., value) { #' #' @export setGeneric("chrNames", function(x) standardGeneric("chrNames")) +#' @describeIn IterableFragments-methods Get chromosome names for IterableFragments setMethod("chrNames", "IterableFragments", function(x) { if (.hasSlot(x, "fragments")) { return(chrNames(x@fragments)) @@ -162,6 +172,7 @@ setMethod("chrNames", "IterableFragments", function(x) { #' #' @export setGeneric("chrNames<-", function(x, ..., value) standardGeneric("chrNames<-")) +#' @describeIn IterableFragments-methods Set chromosome names for IterableFragments setMethod("chrNames<-", "IterableFragments", function(x, ..., value) { if (is.null(chrNames(x))) { stop("Assigning new chrNames is not allowed, only renaming") @@ -183,7 +194,9 @@ setClass("FragmentsTsv", comment = "" ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for FragmentsTsv setMethod("chrNames", "FragmentsTsv", function(x) NULL) +#' @describeIn IterableFragments-misc-methods Get cell names for FragmentsTsv setMethod("cellNames", "FragmentsTsv", function(x) NULL) setMethod("iterate_fragments", "FragmentsTsv", function(x) iterate_10x_fragments_cpp(normalizePath(x@path), x@comment)) @@ -208,15 +221,25 @@ setMethod("short_description", "FragmentsTsv", function(x) { #' 10x default, though it's not quite standard for the bed file format. #' @return 10x fragments file object #' @examples -#' ## Download example fragments from pbmc 500 dataset and save in temp directory +#' ####################################################################### +#' ## write_fragments_10x() example +#' ####################################################################### +#' ## Prep data +#' frags_table <- tibble::tribble( +#' ~chr, ~start, ~end, ~cell_id, +#' "chr1", 0, 5, "cell1", +#' "chr1", 2, 4, "cell2", +#' "chr2", 3, 6, "cell1", +#' "chr3", 7, 9, "cell2" +#' ) +#' frags_table +#' frags <- frags_table %>% convert_to_fragments() +#' #' data_dir <- file.path(tempdir(), "frags_10x") +#' frags_file <- "demo_10x_frags.tsv.gz" #' dir.create(data_dir, recursive = TRUE, showWarnings = FALSE) -#' url_base <- "https://cf.10xgenomics.com/samples/cell-atac/2.0.0/atac_pbmc_500_nextgem/" -#' frags_file <- "atac_pbmc_500_nextgem_fragments.tsv.gz" -#' atac_raw_url <- paste0(url_base, frags_file) -#' if (!file.exists(file.path(data_dir, frags_file))) { -#' download.file(atac_raw_url, file.path(data_dir, frags_file), mode="wb") -#' } +#' write_fragments_10x(frags, file.path(data_dir, frags_file)) +#' #' #' ####################################################################### #' ## open_fragments_10x() example @@ -229,7 +252,7 @@ setMethod("short_description", "FragmentsTsv", function(x) { #' frags #' #' frags %>% write_fragments_dir( -#' file.path(data_dir, "demo_frags_from_h5"), +#' file.path(data_dir, "new_demo_10x_frags"), #' overwrite = TRUE #' ) #' @@ -255,17 +278,6 @@ open_fragments_10x <- function(path, comment = "#", end_inclusive = TRUE) { #' @details **write_fragments_10x** #' #' Fragments will be written to disk immediately, then returned in a readable object. -#' @examples -#' ####################################################################### -#' ## write_fragments_10x() example -#' ####################################################################### -#' frags <- write_fragments_10x( -#' frags, -#' file.path(data_dir, paste0("new_", frags_file)) -#' ) -#' frags -#' -#' #' @export write_fragments_10x <- function(fragments, path, end_inclusive = TRUE, append_5th_column = FALSE) { assert_is_file(path, must_exist = FALSE, extension = c(".tsv", ".tsv.gz")) @@ -305,7 +317,9 @@ setClass("UnpackedMemFragments", version = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for UnpackedMemFragments setMethod("chrNames", "UnpackedMemFragments", function(x) x@chr_names) +#' @describeIn IterableFragments-misc-methods Get cell names for UnpackedMemFragments setMethod("cellNames", "UnpackedMemFragments", function(x) x@cell_names) setMethod("iterate_fragments", "UnpackedMemFragments", function(x) { iterate_unpacked_fragments_cpp(x) @@ -351,7 +365,9 @@ setClass("PackedMemFragments", version = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for PackedMemFragments setMethod("chrNames", "PackedMemFragments", function(x) x@chr_names) +#' @describeIn IterableFragments-misc-methods Get cell names for PackedMemFragments setMethod("cellNames", "PackedMemFragments", function(x) x@cell_names) setMethod("iterate_fragments", "PackedMemFragments", function(x) { iterate_packed_fragments_cpp(x) @@ -423,7 +439,9 @@ setClass("FragmentsDir", cell_names = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for FragmentsDir setMethod("chrNames", "FragmentsDir", function(x) x@chr_names) +#' @describeIn IterableFragments-misc-methods Get cell names for FragmentsDir setMethod("cellNames", "FragmentsDir", function(x) x@cell_names) setMethod("iterate_fragments", "FragmentsDir", function(x) { if (x@compressed) { @@ -533,7 +551,9 @@ setClass("FragmentsHDF5", cell_names = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for FragmentsHDF5 setMethod("chrNames", "FragmentsHDF5", function(x) x@chr_names) +#' @describeIn IterableFragments-misc-methods Get cell names for FragmentsHDF5 setMethod("cellNames", "FragmentsHDF5", function(x) x@cell_names) setMethod("iterate_fragments", "FragmentsHDF5", function(x) { if (x@compressed) { @@ -652,33 +672,38 @@ open_fragments_hdf5 <- function(path, group = "fragments", buffer_size = 16384L) #' Convert between BPCells fragments and R objects. #' #' BPCells fragments can be interconverted with GRanges and data.frame R objects. -#' The main conversion method is R's builtin `as()` function, though the -#' `convert_to_fragments()` helper is also available. For all R objects except +#' The main conversion method is R's builtin `as()` function, though the +#' `convert_to_fragments()` helper is also available. For all R objects except #' GRanges, BPCells assumes a 0-based, end-exclusive coordinate system. (See #' [genomic-ranges-like] reference for details) #' -#' @usage -#' # Convert from R to BPCells -#' convert_to_fragments(x, zero_based_coords = !is(x, "GRanges")) -#' as(x, "IterableFragments") -#' -#' # Convert from BPCells to R -#' as.data.frame(bpcells_fragments) -#' as(bpcells_fragments, "data.frame") -#' as(bpcells_fragments, "GRanges") +#' @details Coercions rely on base R's `as()`; for example `as(frags, "data.frame")` +#' converts BPCells fragments back to a tabular format, while `as(x, "IterableFragments")` +#' materialises supported R objects as fragment stores. Coercions to and from +#' `GRanges` require the GenomicRanges package to be installed. +#' +#' @aliases as.data.frame.IterableFragments #' #' @param x `r document_granges("Fragment coordinates", extras=c("cell_id" = "cell barcodes or unique identifiers as string or factor"))` +#' @param IterableFragments BPCells IterableFragments object +#' @param data.frame Data frame with columns chr, start, end, and cell_id +#' @param GRanges GenomicRanges object with metadata column cell_id #' @param zero_based_coords Whether to convert the ranges from a 1-based end-inclusive #' coordinate system to a 0-based end-exclusive coordinate system. Defaults to true #' for GRanges and false for other formats -#' (see this [archived UCSC blogpost](https://web.archive.org/web/20210920203703/http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/)) +#' (see this [archived UCSC blogpost](https://web.archive.org/web/20210920203703/https://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/)) +#' @param ... Additional arguments passed to methods +#' @param row.names Optional row names for compatibility with `as.data.frame()`. +#' Ignored for IterableFragments objects. +#' @param optional Logical flag for compatibility with `as.data.frame()`; ignored. #' @return **convert_to_fragments()**: IterableFragments object #' @examples -#' frags_table <- tibble::tibble( -#' chr = paste0("chr", 1:10), -#' start = 0, -#' end = 5, -#' cell_id = "cell1" +#' frags_table <- tibble::tribble( +#' ~chr, ~start, ~end, ~cell_id, +#' "chr1", 0, 5, "cell1", +#' "chr1", 2, 4, "cell2", +#' "chr2", 3, 6, "cell1", +#' "chr3", 7, 9, "cell2" #' ) #' frags_table #' @@ -715,6 +740,51 @@ open_fragments_hdf5 <- function(path, group = "fragments", buffer_size = 16384L) #' frags_granges #' #' +#' @rdname fragment_R_conversion +#' @name fragment_R_conversion_coercions +#' @usage +#' \method{as.data.frame}{IterableFragments}(x, row.names = NULL, optional = FALSE, ...) +NULL + +#' IterableFragments subclass methods +#' +#' Methods defined for classes that extend `IterableFragments`, providing access +#' to metadata or specialised behaviours for storage backends and selection +#' wrappers. +#' +#' @param x An object inheriting from `IterableFragments`. +#' @name IterableFragments-misc-methods +#' @rdname IterableFragments-misc-methods +#' @docType methods +#' @keywords internal +NULL + +#' Convert between BPCells fraagments and R objects misc. +#' +#' See `fragment_R_conversion` for main page. +#' BPCells fragments can be interconverted with GRanges and data.frame R objects. +#' The main conversion method is R's builtin `as()` function, though the convert_to_fragments() helper is also available. +#' For all R objects except GRanges, BPCells assumes a 0-based, end-exclusive coordinate system. +#' (See `genomic-ranges-like`` reference for details) +#' +#' @param from Object supplied to `base::coerce()` (typically generated by `as()`) +#' @param to Target class name for coercion +#' @param ... Additional arguments passed to methods +#' @aliases coerce,data.frame,IterableFragments-method +#' @aliases coerce,IterableFragments,data.frame-method +#' @aliases coerce,GRanges,IterableFragments-method +#' @aliases coerce,IterableFragments,GRanges-method +#' @usage +#' \S4method{coerce}{data.frame,IterableFragments}(from, to, ...) +#' \S4method{coerce}{IterableFragments,data.frame}(from, to, ...) +#' \S4method{coerce}{IterableFragments,GRanges}(from, to, ...) +#' \S4method{coerce}{GRanges,IterableFragments}(from, to, ...) +#' @name fragment_r_conversion-misc +#' @rdname fragment_r_conversion-misc +#' @docType methods +#' @keywords internal +NULL + #' @rdname fragment_R_conversion #' @export convert_to_fragments <- function(x, zero_based_coords = !is(x, "GRanges")) { @@ -794,9 +864,11 @@ setAs("IterableFragments", "data.frame", function(from) { }) #' @exportS3Method base::as.data.frame -as.data.frame.IterableFragments <- function(x, ...) as(x, "data.frame") +as.data.frame.IterableFragments <- function(x, row.names = NULL, optional = FALSE, ...) as(x, "data.frame") #' @export +#' @describeIn IterableFragments-methods Coerce IterableFragments to a data.frame setMethod("as.data.frame", signature(x = "IterableFragments"), function(x, ...) as(x, "data.frame")) + setAs("data.frame", "IterableFragments", function(from) { convert_to_fragments(from) }) @@ -928,6 +1000,7 @@ setClass("ChrSelectName", chr_names = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for ChrSelectName setMethod("chrNames", "ChrSelectName", function(x) x@chr_names) setMethod("iterate_fragments", "ChrSelectName", function(x) { iterate_chr_name_select_cpp(iterate_fragments(x@fragments), x@chr_names) @@ -953,6 +1026,7 @@ setClass("ChrSelectIndex", chr_index_selection = NA_integer_ ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for ChrSelectIndex setMethod("chrNames", "ChrSelectIndex", function(x) chrNames(x@fragments)[x@chr_index_selection]) setMethod("iterate_fragments", "ChrSelectIndex", function(x) { iterate_chr_index_select_cpp(iterate_fragments(x@fragments), x@chr_index_selection - 1) @@ -1031,6 +1105,7 @@ setClass("CellSelectName", cell_names = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get cell names for CellSelectName setMethod("cellNames", "CellSelectName", function(x) x@cell_names) setMethod("iterate_fragments", "CellSelectName", function(x) { iterate_cell_name_select_cpp(iterate_fragments(x@fragments), x@cell_names) @@ -1056,6 +1131,7 @@ setClass("CellSelectIndex", cell_index_selection = NA_integer_ ) ) +#' @describeIn IterableFragments-misc-methods Get cell names for CellSelectIndex setMethod("cellNames", "CellSelectIndex", function(x) cellNames(x@fragments)[x@cell_index_selection]) setMethod("iterate_fragments", "CellSelectIndex", function(x) { iterate_cell_index_select_cpp(iterate_fragments(x@fragments), x@cell_index_selection - 1) @@ -1131,6 +1207,7 @@ setClass("CellMerge", group_names = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get cell names for CellMerge setMethod("cellNames", "CellMerge", function(x) x@group_names) setMethod("iterate_fragments", "CellMerge", function(x) { iterate_cell_merge_cpp(iterate_fragments(x@fragments), x@group_ids, x@group_names) @@ -1189,6 +1266,7 @@ setClass("ChrRename", chr_names = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for ChrRename setMethod("chrNames", "ChrRename", function(x) x@chr_names) setMethod("iterate_fragments", "ChrRename", function(x) { iterate_chr_rename_cpp(iterate_fragments(x@fragments), x@chr_names) @@ -1212,6 +1290,7 @@ setClass("CellRename", cell_names = character(0) ) ) +#' @describeIn IterableFragments-misc-methods Get cell names for CellRename setMethod("cellNames", "CellRename", function(x) x@cell_names) setMethod("iterate_fragments", "CellRename", function(x) { iterate_cell_rename_cpp(iterate_fragments(x@fragments), x@cell_names) @@ -1234,6 +1313,7 @@ setClass("CellPrefix", prefix = "" ) ) +#' @describeIn IterableFragments-misc-methods Get cell names for CellPrefix setMethod("cellNames", "CellPrefix", function(x) { if (is.null(cellNames(x@fragments))) return(NULL) paste0(x@prefix, cellNames(x@fragments)) @@ -1369,9 +1449,11 @@ setClass("MergeFragments", fragments_list = list() ) ) +#' @describeIn IterableFragments-misc-methods Get chromosome names for MergeFragments setMethod("chrNames", "MergeFragments", function(x) { Reduce(union, lapply(x@fragments_list, chrNames)) }) +#' @describeIn IterableFragments-misc-methods Get cell names for MergeFragments setMethod("cellNames", "MergeFragments", function(x) { do.call(c, lapply(x@fragments_list, cellNames)) }) @@ -1389,6 +1471,7 @@ setMethod("short_description", "MergeFragments", function(x) { }) # Allow merging fragments using standard concatenation method +#' @describeIn IterableFragments-methods Concatenate IterableFragments objects setMethod("c", "IterableFragments", function(x, ...) { tail <- list(...) if (length(tail) == 1 && is.null(tail[[1]])) { diff --git a/r/R/geneScores.R b/r/R/geneScores.R index 71969020..b59a394e 100644 --- a/r/R/geneScores.R +++ b/r/R/geneScores.R @@ -88,7 +88,7 @@ range_distance_to_nearest <- function(ranges, addArchRBug = FALSE, zero_based_co #' @inheritParams normalize_ranges #' @param upstream Number of bases to extend each range upstream (negative to shrink width) #' @param downstream Number of bases to extend each range downstream (negative to shrink width) -#' @param chromosome_sizes (optional) Size of chromosomes as a [genomic-ranges] object +#' @param chromosome_sizes (optional) Size of chromosomes as a [genomic-ranges-like] object #' @details Note that ranges will be blocked from extending past the beginning of the chromosome (base 0), #' and if `chromosome_sizes` is given then they will also be blocked from extending past the end of the chromosome #' @examples @@ -152,15 +152,25 @@ extend_ranges <- function(ranges, upstream = 0, downstream = 0, metadata_cols = #' be negative. The distance of adjacent but non-overlapping regions is 1bp, counting #' up from there. #' @examples +#' \dontrun{ #' ## Prep data #' directory <- file.path(tempdir(), "references") #' genes <- read_gencode_genes( #' directory, #' release = "42", -#' annotation_set = "basic", +#' annotation_set = "basic", #' ) +#' } #' -#' +#' ## Use pre-generated data for this example +#' ## Use pre-computed transcripts for chr4 +#' genes <- readr::read_delim( +#' file.path( +#' system.file("extdata", package = "BPCells"), +#' "transcripts_filtered_example_chr_4.tsv.gz"), +#' delim = "\t", show_col_types = FALSE +#' ) +#' #' ## Get gene scores by tile #' gene_score_tiles_archr( #' genes @@ -226,6 +236,7 @@ gene_score_tiles_archr <- function(genes, chromosome_sizes = NULL, tile_width = #' #' Weight matrix of dimension genes x tiles #' @examples +#' \dontrun{ #' ## Prep data #' reference_dir <- file.path(tempdir(), "references") #' frags <- get_demo_frags() @@ -234,8 +245,10 @@ gene_score_tiles_archr <- function(genes, chromosome_sizes = NULL, tile_width = #' release="42", #' annotation_set = "basic", #' ) %>% dplyr::filter(chr %in% c("chr4", "chr11")) -#' blacklist <- read_encode_blacklist(reference_dir, genome="hg38") %>% dplyr::filter(chr %in% c("chr4", "chr11")) -#' chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") %>% dplyr::filter(chr %in% c("chr4", "chr11")) +#' blacklist <- read_encode_blacklist(reference_dir, genome="hg38") %>% +#' dplyr::filter(chr %in% c("chr4", "chr11")) +#' chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") %>% +#' dplyr::filter(chr %in% c("chr4", "chr11")) #' chrom_sizes$tile_width = 500 #' #' @@ -252,6 +265,7 @@ gene_score_tiles_archr <- function(genes, chromosome_sizes = NULL, tile_width = #' #' ## Get gene scores per cell #' gene_score_weights %*% tiles +#' } #' #' #' @export @@ -315,12 +329,14 @@ gene_score_weights_archr <- function(genes, chromosome_sizes, blacklist = NULL, #' #' Gene score matrix of dimension genes x cells. #' @examples +#' \dontrun{ #' ####################################################################### #' ## gene_score_archr() example #' ####################################################################### #' ## This is a wrapper that creates both the gene score weight #' ## matrix and tile matrix together #' gene_score_archr(frags, genes, chrom_sizes, blacklist) +#' } #' #' #' @export diff --git a/r/R/genomeAnnotations.R b/r/R/genomeAnnotations.R index 70864795..97c1dc24 100644 --- a/r/R/genomeAnnotations.R +++ b/r/R/genomeAnnotations.R @@ -9,7 +9,7 @@ #' Download a file with a custom timeout #' #' @param path Output path to write file -#' @param url to download from +#' @param backup_url to download from #' @param timeout timeout in seconds #' @keywords internal ensure_downloaded <- function(path, backup_url, timeout) { @@ -54,15 +54,27 @@ ensure_downloaded <- function(path, backup_url, timeout) { #' ####################################################################### #' ## read_gtf() example #' ####################################################################### +#' if (interactive()) { +#' #' species <- "Saccharomyces_cerevisiae" #' version <- "GCF_000146045.2_R64" #' head(read_gtf( -#' path = sprintf("./reference/%s_genomic.gtf.gz", version), +#' path = sprintf("%s/%s_genomic.gtf.gz", file.path(tempdir(), "references"), version), #' backup_url = sprintf( #' "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/fungi/%s/reference/%s/%s_genomic.gtf.gz", #' species, version, version #' ) #' )) +#' #> # A tibble: 6 x 9 +#' #> chr source feature start end score strand frame gene_id +#' #> +#' #> 1 NC_001133.9 RefSeq gene 1806 2169 . - . YAL068C +#' #> 2 NC_001133.9 RefSeq gene 2479 2707 . + . YAL067W-A +#' #> 3 NC_001133.9 RefSeq gene 7234 9016 . - . YAL067C +#' #> 4 NC_001133.9 RefSeq gene 11564 11951 . - . YAL065C +#' #> 5 NC_001133.9 RefSeq gene 12045 12426 . + . YAL064W-B +#' #> 6 NC_001133.9 RefSeq gene 13362 13743 . - . YAL064C-A +#' } #' #' #' @seealso [read_bed()], [read_encode_blacklist()] @@ -107,7 +119,24 @@ read_gtf <- function(path, attributes = c("gene_id"), tags = character(0), featu #' ####################################################################### #' ## read_gencode_genes() example #' ####################################################################### -#' read_gencode_genes("./references", release = "42") +#' if (interactive()) { +#' read_gencode_genes(file.path(tempdir(), "references"), release = "42", timeout = 3000) +#' #> # A tibble: 39,319 x 11 +#' #> chr source feature start end score strand frame gene_id gene_type +#' #> +#' #> 1 chr1 HAVANA gene 11868 14409 . + . ENSG00000290... lncRNA +#' #> 2 chr1 HAVANA gene 29553 31109 . + . ENSG00000243... lncRNA +#' #> 3 chr1 HAVANA gene 34553 36081 . - . ENSG00000237... lncRNA +#' #> 4 chr1 HAVANA gene 57597 64116 . + . ENSG00000290... lncRNA +#' #> 5 chr1 HAVANA gene 65418 71585 . + . ENSG00000186... protein_... +#' #> 6 chr1 HAVANA gene 89294 133723 . - . ENSG00000238... lncRNA +#' #> 7 chr1 HAVANA gene 89550 91105 . - . ENSG00000239... lncRNA +#' #> 8 chr1 HAVANA gene 139789 140339 . - . ENSG00000239... lncRNA +#' #> 9 chr1 HAVANA gene 141473 173862 . - . ENSG00000241... lncRNA +#' #> 10 chr1 HAVANA gene 160445 161525 . + . ENSG00000241... lncRNA +#' #> # i 39,309 more rows +#' #> # i 1 more variable: gene_name +#' } #' #' #' @export @@ -159,9 +188,24 @@ read_gencode_genes <- function(dir, release = "latest", #' ####################################################################### #' ## If read_gencode_genes() was already ran on the same release, #' ## will reuse previously downloaded annotations -#' read_gencode_transcripts("./references", release = "42") -#' -#' +#' if (interactive()) { +#' read_gencode_transcripts(file.path(tempdir(), "references"), release = "42", timeout = 3000) +#' #> # A tibble: 220,296 x 13 +#' #> chr source feature start end score strand frame gene_id gene_type +#' #> +#' #> 1 chr1 HAVANA transcript 65418 71585 . + . ENSG00000... protein_... +#' #> 2 chr1 HAVANA exon 65418 65433 . + . ENSG00000... protein_... +#' #> 3 chr1 HAVANA exon 65519 65573 . + . ENSG00000... protein_... +#' #> 4 chr1 HAVANA exon 69036 71585 . + . ENSG00000... protein_... +#' #> 5 chr1 HAVANA transcript 450739 451678 . - . ENSG00000... protein_... +#' #> 6 chr1 HAVANA exon 450739 451678 . - . ENSG00000... protein_... +#' #> 7 chr1 HAVANA transcript 685715 686654 . - . ENSG00000... protein_... +#' #> 8 chr1 HAVANA exon 685715 686654 . - . ENSG00000... protein_... +#' #> 9 chr1 HAVANA transcript 923922 944574 . + . ENSG00000... protein_... +#' #> 10 chr1 HAVANA exon 923922 924948 . + . ENSG00000... protein_... +#' #> # i 220,286 more rows +#' #> # i 3 more variables: gene_name , transcript_id , MANE_Select +#' } #' @export read_gencode_transcripts <- function(dir, release = "latest", transcript_choice = c("MANE_Select", "Ensembl_Canonical", "all"), annotation_set = c("basic", "comprehensive"), @@ -211,17 +255,18 @@ read_gencode_transcripts <- function(dir, release = "latest", transcript_choice #' @return Data frame with coordinates using the 0-based convention. #' @examples #' ## Dummy bed file creation +#' file_name <- tempfile(fileext = ".bed") #' data.frame( #' chrom = rep("chr1", 6), #' start = seq(20, 121, 20), #' end = seq(39, 140, 20) -#' ) %>% write.table("./references/example.bed", row.names = FALSE, col.names = FALSE, sep = "\t") +#' ) %>% write.table(file_name, row.names = FALSE, col.names = FALSE, sep = "\t") #' #' #' ####################################################################### #' ## read_bed() example #' ####################################################################### -#' read_bed("./references/example.bed") +#' read_bed(file_name) #' #' #' @seealso [read_gtf()], [read_gencode_genes()] @@ -239,13 +284,13 @@ read_bed <- function(path, additional_columns = character(0), backup_url = NULL, #' @rdname read_bed #' @details **read_encode_blacklist** #' -#' Downloads the Boyle Lab blacklist, as described in +#' Downloads the Boyle Lab blacklist, as described in \doi{10.1038/s41598-019-45839-z} #' @param genome genome name #' @examples #' ####################################################################### #' ## read_encode_blacklist() example #' ####################################################################### -#' read_encode_blacklist("./reference") +#' read_encode_blacklist(file.path(tempdir(), "references")) #' #' #' @export @@ -263,9 +308,14 @@ read_encode_blacklist <- function(dir, genome = c("hg38", "mm10", "hg19", "dm6", #' The underlying data is pulled from here: #' #' @examples -#' read_ucsc_chrom_sizes("./reference") +#' read_ucsc_chrom_sizes(file.path(tempdir(), "references")) #' @export +#' @param dir Output directory to cache the downloaded chrom sizes file +#' @param genome Genome name. Defaults to hg38 +#' @param keep_chromosomes Regular expression with which chromosomes to keep. +#' Defaults to standard chromosomes (chr1-22, chrX, chrY) +#' @param timeout Maximum time in seconds to wait for download from UCSC read_ucsc_chrom_sizes <- function(dir, genome = c("hg38", "mm39", "mm10", "mm9", "hg19"), keep_chromosomes = "chr[0-9]+|chrX|chrY", timeout = 300) { genome <- match.arg(genome) @@ -416,21 +466,31 @@ canonical_gene_symbol <- function(query, gene_mapping = human_gene_mapping) { #' symmetric. If length 2, provide upstream extension then downstream extension as positive distances. #' @return List of chr, start, end positions for use with trackplot functions. #' @examples +#' \dontrun{ #' ## Prep data #' genes <- read_gencode_transcripts( #' file.path(tempdir(), "references"), release = "42", #' annotation_set = "basic", -#' features = "transcript" +#' features = "transcript", timeout = 3000 #' ) #' #' ## Get gene region #' gene_region(genes, "CD19", extend_bp = 1e5) +#' #> $chr +#' #> [1] "chr16" +#' #> +#' #> $start +#' #> [1] 28831970 +#' #> +#' #> $end +#' #> [1] 29039342 +#' } #' @export gene_region <- function(genes, gene_symbol, extend_bp = c(1e4, 1e4), gene_mapping = human_gene_mapping) { genes <- normalize_ranges(genes, metadata_cols = c("strand", "gene_name")) idx <- match_gene_symbol(gene_symbol, genes$gene_name) if (is.na(idx)) { - rlang::stop("Could not locate gene") + rlang::abort("Could not locate gene") } if (length(extend_bp) == 1) { extend_bp <- c(extend_bp, extend_bp) diff --git a/r/R/matrix-svds-docs.R b/r/R/matrix-svds-docs.R deleted file mode 100644 index f9115675..00000000 --- a/r/R/matrix-svds-docs.R +++ /dev/null @@ -1,85 +0,0 @@ -# This file is distributed under the MPL-2.0 license -# It is adapted from the RSpectra package, which is distributed under the MPL-2.0 license. -# The RSpectra package is copyright Yixuan Qiu 2016 -# Original source code available here: https://github.com/yixuan/RSpectra -# Modifications are copyright 2024 BPCells contributors -# -# SPDX-License-Identifier: MPL-2.0 - -#' Calculate svds -#' -#' Use the C++ Spectra solver (same as RSpectra package), in order to -#' compute the largest k values and corresponding singular vectors. -#' Empirically, memory usage is much lower than using `irlba::irlba()`, likely -#' due to avoiding R garbage creation while solving due to the pure-C++ solver. -#' This documentation is a slightly-edited version of the `RSpectra::svds()` -#' documentation. -#' -#' @param A The matrix whose truncated SVD is to be computed. -#' @param k Number of singular values requested. -#' @param nu Number of left singular vectors to be computed. This must be between 0 and 'k'. (Must be equal to 'k' for BPCells IterableMatrix) -#' @param nu Number of right singular vectors to be computed. This must be between 0 and 'k'. (Must be equal to 'k' for BPCells IterableMatrix) -#' @param opts Control parameters related to computing algorithm. See *Details* below -#' @param threads Control threads to use calculating mat-vec producs (BPCells specific) -#' @return A list with the following components: -##' \item{d}{A vector of the computed singular values.} -##' \item{u}{An \code{m} by \code{nu} matrix whose columns contain -##' the left singular vectors. If \code{nu == 0}, \code{NULL} -##' will be returned.} -##' \item{v}{An \code{n} by \code{nv} matrix whose columns contain -##' the right singular vectors. If \code{nv == 0}, \code{NULL} -##' will be returned.} -##' \item{nconv}{Number of converged singular values.} -##' \item{niter}{Number of iterations used.} -##' \item{nops}{Number of matrix-vector multiplications used.} -#' @details -#' When RSpectra is installed, this function will just add a method to -#' `RSpectra::svds()` for the `IterableMatrix` class. -#' -#' The \code{opts} argument is a list that can supply any of the -#' following parameters: -#' -#' \describe{ -#' \item{\code{ncv}}{Number of Lanzcos basis vectors to use. More vectors -#' will result in faster convergence, but with greater -#' memory use. \code{ncv} must be satisfy -#' \eqn{k < ncv \le p}{k < ncv <= p} where -#' \code{p = min(m, n)}. -#' Default is \code{min(p, max(2*k+1, 20))}.} -#' \item{\code{tol}}{Precision parameter. Default is 1e-10.} -#' \item{\code{maxitr}}{Maximum number of iterations. Default is 1000.} -#' \item{\code{center}}{Either a logical value (\code{TRUE}/\code{FALSE}), or a numeric -#' vector of length \eqn{n}. If a vector \eqn{c} is supplied, then -#' SVD is computed on the matrix \eqn{A - 1c'}{A - 1 * c'}, -#' in an implicit way without actually forming this matrix. -#' \code{center = TRUE} has the same effect as -#' \code{center = colMeans(A)}. Default is \code{FALSE}. Ignored in BPCells} -#' \item{\code{scale}}{Either a logical value (\code{TRUE}/\code{FALSE}), or a numeric -#' vector of length \eqn{n}. If a vector \eqn{s} is supplied, then -#' SVD is computed on the matrix \eqn{(A - 1c')S}{(A - 1 * c')S}, -#' where \eqn{c} is the centering vector and \eqn{S = diag(1/s)}. -#' If \code{scale = TRUE}, then the vector \eqn{s} is computed as -#' the column norm of \eqn{A - 1c'}{A - 1 * c'}. -#' Default is \code{FALSE}. Ignored in BPCells} -#' } -#' @references Qiu Y, Mei J (2022). _RSpectra: Solvers for Large-Scale Eigenvalue and SVD Problems_. R package version 0.16-1, . -#' @usage svds(A, k, nu = k, nv = k, opts = list(), threads=0L, ...) -#' @examples -#' mat <- matrix(rnorm(500), nrow = 50, ncol = 10) -#' rownames(mat) <- paste0("gene", seq_len(50)) -#' colnames(mat) <- paste0("cell", seq_len(10)) -#' mat <- mat %>% as("dgCMatrix") %>% as("IterableMatrix") -#' -#' svd_res <- svds(mat, k = 5) -#' -#' names(svd_res) -#' -#' svd_res$d -#' -#' dim(svd_res$u) -#' -#' dim(svd_res$v) -#' # Can also pass in values directly into RSpectra::svds -#' svd_res <- svds(mat, k = 5, opts=c(maxitr = 500)) -#' @name svds -NULL \ No newline at end of file diff --git a/r/R/matrix.R b/r/R/matrix.R index 6c7fe02a..810d542c 100644 --- a/r/R/matrix.R +++ b/r/R/matrix.R @@ -6,12 +6,80 @@ # option. This file may not be copied, modified, or distributed # except according to those terms. -#' IterableMatrix methods +#' IterableMatrix core methods #' -#' Generic methods and built-in functions for IterableMatrix objects +#' Core operations for IterableMatrix objects: inspection, transpose, subsetting, and dimnames. #' -#' @name IterableMatrix-methods -#' @rdname IterableMatrix-methods +#' @param x IterableMatrix object +#' @param object IterableMatrix object +#' @param i Row indices or selection helpers. +#' @param j Column indices or selection helpers. +#' @param value Replacement value supplied to assignment methods. +#' @param drop Logical indicating whether to drop dimensions when subsetting. +#' @param ... Additional arguments passed to methods +#' @name IterableMatrix-methods-core +#' @rdname IterableMatrix-methods-core +NULL + + +#' IterableMatrix summary statistics +#' +#' Summaries and reductions for IterableMatrix objects (sums, means, variances, quantiles, extrema). +#' +#' @param x IterableMatrix object or matrix-like object. +#' @param rows (Integer) Optional vector of row indices to operate over. +#' @param cols (Integer) Optional vector of column indices to operate over. +#' @param na.rm (Logical) Should missing values (NA) be removed? +#' @param center Optional center values (vector of length nrow(x) or ncol(x)) +#' @param probs (Numeric) Quantile value(s) to be computed, between 0 and 1. +#' @param type (Integer) between 4 and 9 selecting which quantile algorithm to use, detailed in `matrixStats::rowQuantiles()` +#' @param digits Number of decimal places for quantile calculations +#' @param drop (Logical) If TRUE and only one quantile is requested, the result is coerced to a vector (For non-BPCells objects). +#' @param useNames (Logical) Whether to use row and column names in the output. +#' @param ... Additional arguments passed to methods +#' @name IterableMatrix-methods-stats +#' @rdname IterableMatrix-methods-stats +NULL + +#' IterableMatrix operations +#' +#' Matrix multiplication, arithmetic, and comparison operations for IterableMatrix objects. +#' +#' @param x IterableMatrix object or compatible operand +#' @param y IterableMatrix object or compatible operand +#' @param e1 Left operand for binary operations. +#' @param e2 Right operand for binary operations. +#' @param digits Number of decimal places for rounding +#' @name IterableMatrix-methods-ops +#' @rdname IterableMatrix-methods-ops +NULL + +#' IterableMatrix operations (additional overloads) +#' +#' Extra operator overloads documented separately to avoid duplicate entries in the main reference. +#' +#' @keywords internal +#' @name IterableMatrix-methods-ops-misc +#' @rdname IterableMatrix-methods-ops-misc +NULL + +#' IterableMatrix subclass methods +#' +#' Methods for classes that extend `IterableMatrix` but are not dispatched +#' directly on the base class. These are typically helper objects that wrap +#' another matrix or alter behaviour (e.g., concatenation, on-disk access). +#' +#' @param x An object inheriting from `IterableMatrix`. +#' @param i Row indices or selection helpers. +#' @param j Column indices or selection helpers. +#' @param drop Logical indicating whether to drop dimensions (for subsetting). +#' @param e1 Left operand for binary operations. +#' @param e2 Right operand for binary operations. +#' @param ... Additional arguments passed through the call. +#' @name IterableMatrix-misc-methods +#' @rdname IterableMatrix-misc-methods +#' @docType methods +#' @keywords internal NULL setClass("IterableMatrix", @@ -56,7 +124,7 @@ denormalize_dimnames <- function(dimnames) { #' @keywords internal setGeneric("iterate_matrix", function(x) standardGeneric("iterate_matrix")) -#' @describeIn IterableMatrix-methods Get the matrix data type (mat_uint32_t, mat_float, or mat_double for now) +#' @describeIn IterableMatrix-methods-core Get the matrix data type (mat_uint32_t, mat_float, or mat_double for now) #' @examples #' ## Prep data #' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") @@ -74,7 +142,7 @@ setGeneric("iterate_matrix", function(x) standardGeneric("iterate_matrix")) #' @export setGeneric("matrix_type", function(x) standardGeneric("matrix_type")) -#' @describeIn IterableMatrix-methods Get the matrix storage order ("row" or "col") +#' @describeIn IterableMatrix-methods-core Get the matrix storage order ("row" or "col") (generic) #' @examples #' ####################################################################### #' ## storage_order() example @@ -85,6 +153,7 @@ setGeneric("matrix_type", function(x) standardGeneric("matrix_type")) #' @export setGeneric("storage_order", function(x) standardGeneric("storage_order")) +#' @describeIn IterableMatrix-methods-core Get the matrix storage order ("row" or "col") setMethod("storage_order", "IterableMatrix", function(x) if(x@transpose) "row" else "col") #' Return a list of input matrices to the current matrix (experimental) @@ -131,6 +200,15 @@ setMethod("matrix_inputs<-", "IterableMatrix", function(x, ..., value) { #' @param value List of IterableMatrix objects #' @return List of IterableMatrix objects. If a matrix `m` is itself an input object, then #' `all_matrix_inputs(m)` will return `list(m)`. +#' @examples +#' mat <- matrix(1:9, nrow=3) %>% as("IterableMatrix") +#' +#' # Just returns the matrix itself +#' all_matrix_inputs(mat) +#' +#' # Returns the matrix twice, as they are the inputs to the cbind +#' all_matrix_inputs(cbind(mat, mat)) +#' #' @export all_matrix_inputs <- function(x) { assert_is(x, "IterableMatrix") @@ -183,7 +261,7 @@ setMethod("short_description", "IterableMatrix", function(x) { }) -#' @describeIn IterableMatrix-methods Display an IterableMatrix +#' @describeIn IterableMatrix-methods-core Display an IterableMatrix #' @param object IterableMatrix object #' @examples #' ####################################################################### @@ -218,7 +296,7 @@ setMethod("show", "IterableMatrix", function(object) { }) -#' @describeIn IterableMatrix-methods Transpose an IterableMatrix +#' @describeIn IterableMatrix-methods-core Transpose an IterableMatrix #' @param x IterableMatrix object #' @return * `t()` Transposed object #' @examples @@ -243,9 +321,15 @@ setMethod("t", signature(x = "IterableMatrix"), function(x) { #' @param x IterableMatrix object #' @param y matrix -#' @describeIn IterableMatrix-methods Multiply by a dense matrix +#' @describeIn IterableMatrix-methods-ops Multiply by a dense matrix #' @return * `x %*% y`: dense matrix result #' @examples +#' ## Prep data +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' mat +#' #' ####################################################################### #' ## `x %*% y` example #' ####################################################################### @@ -264,6 +348,7 @@ setMethod("%*%", signature(x = "IterableMatrix", y = "matrix"), function(x, y) { res }) +#' @describeIn IterableMatrix-misc-methods Multiply a dense matrix by an IterableMatrix setMethod("%*%", signature(x = "matrix", y = "IterableMatrix"), function(x, y) { iter <- iterate_matrix(convert_matrix_type(y, "double")) if (y@transpose) { @@ -276,6 +361,7 @@ setMethod("%*%", signature(x = "matrix", y = "IterableMatrix"), function(x, y) { res }) +#' @describeIn IterableMatrix-misc-methods Multiply an IterableMatrix by a numeric vector setMethod("%*%", signature(x = "IterableMatrix", y = "numeric"), function(x, y) { iter <- iterate_matrix(convert_matrix_type(x, "double")) if (x@transpose) { @@ -288,6 +374,7 @@ setMethod("%*%", signature(x = "IterableMatrix", y = "numeric"), function(x, y) res }) +#' @describeIn IterableMatrix-misc-methods Multiply a numeric row vector by an IterableMatrix setMethod("%*%", signature(x = "numeric", y = "IterableMatrix"), function(x, y) { iter <- iterate_matrix(convert_matrix_type(y, "double")) if (y@transpose) { @@ -328,6 +415,18 @@ linear_operator <- function(mat) { new("LinearOperator", dim = dim(mat), xptr = iterate_matrix(convert_matrix_type(mat, "double")), transpose = mat@transpose) } +#' LinearOperator multiplication helpers +#' +#' Methods enabling `\%*%` between `LinearOperator` objects and dense matrices or numeric vectors. +#' +#' @param x Left operand. +#' @param y Right operand. +#' @name LinearOperator-math +#' @docType methods +#' @keywords internal +NULL + +#' @describeIn LinearOperator-math Multiply a LinearOperator by a dense matrix setMethod("%*%", signature(x = "LinearOperator", y = "matrix"), function(x, y) { if (x@transpose) { return(t(dense_multiply_left_preserve_loader_cpp(x@xptr, t(y)))) @@ -336,6 +435,7 @@ setMethod("%*%", signature(x = "LinearOperator", y = "matrix"), function(x, y) { } }) +#' @describeIn LinearOperator-math Multiply a dense matrix by a LinearOperator setMethod("%*%", signature(x = "matrix", y = "LinearOperator"), function(x, y) { if (y@transpose) { return(t(dense_multiply_right_preserve_loader_cpp(y@xptr, t(x)))) @@ -344,6 +444,7 @@ setMethod("%*%", signature(x = "matrix", y = "LinearOperator"), function(x, y) { } }) +#' @describeIn LinearOperator-math Multiply a LinearOperator by a numeric vector setMethod("%*%", signature(x = "LinearOperator", y = "numeric"), function(x, y) { if (x@transpose) { return(vec_multiply_left_preserve_loader_cpp(x@xptr, y)) @@ -352,6 +453,7 @@ setMethod("%*%", signature(x = "LinearOperator", y = "numeric"), function(x, y) } }) +#' @describeIn LinearOperator-math Multiply a numeric vector by a LinearOperator setMethod("%*%", signature(x = "numeric", y = "LinearOperator"), function(x, y) { if (y@transpose) { return(vec_multiply_right_preserve_loader_cpp(y@xptr, x)) @@ -373,6 +475,7 @@ setClass("MatrixMultiply", right = NULL ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for MatrixMultiply objects setMethod("matrix_type", signature(x = "MatrixMultiply"), function(x) matrix_type(x@left)) setMethod("matrix_inputs", "MatrixMultiply", function(x) list(x@left, x@right)) setMethod("matrix_inputs<-", "MatrixMultiply", function(x, ..., value) { @@ -406,6 +509,7 @@ setMethod("short_description", "MatrixMultiply", function(x) { } }) +#' @describeIn IterableMatrix-misc-methods Multiply two IterableMatrix objects setMethod("%*%", signature(x = "IterableMatrix", y = "IterableMatrix"), function(x, y) { if (x@transpose != y@transpose) stop("Cannot multiply matrices with different internal transpose states.\nPlease use transpose_storage_order().") if (x@transpose) { @@ -425,6 +529,7 @@ setMethod("%*%", signature(x = "IterableMatrix", y = "IterableMatrix"), function new("MatrixMultiply", left = x, right = y, transpose = FALSE, dim = dim, dimnames = dimnames) }) +#' @describeIn IterableMatrix-misc-methods Multiply an IterableMatrix by a dgCMatrix setMethod("%*%", signature(x = "IterableMatrix", y = "dgCMatrix"), function(x, y) { if (x@transpose) { t(as(t(y), "IterableMatrix") %*% t(x)) @@ -433,6 +538,7 @@ setMethod("%*%", signature(x = "IterableMatrix", y = "dgCMatrix"), function(x, y } }) +#' @describeIn IterableMatrix-misc-methods Multiply a dgCMatrix by an IterableMatrix setMethod("%*%", signature(x = "dgCMatrix", y = "IterableMatrix"), function(x, y) { if (y@transpose) { t(t(y) %*% as(t(x), "IterableMatrix")) @@ -443,6 +549,7 @@ setMethod("%*%", signature(x = "dgCMatrix", y = "IterableMatrix"), function(x, y # Subsetting on MatrixMultiply +#' @describeIn IterableMatrix-misc-methods Subset MatrixMultiply results setMethod("[", "MatrixMultiply", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") # Handle transpose via recursive call @@ -478,6 +585,7 @@ setClass("MatrixMask", invert = FALSE ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for MatrixMask objects setMethod("matrix_type", signature(x = "MatrixMask"), function(x) matrix_type(x@matrix)) setMethod("matrix_inputs", "MatrixMask", function(x) list(x@matrix, x@mask)) setMethod("matrix_inputs<-", "MatrixMask", function(x, ..., value) { @@ -544,6 +652,7 @@ setClass("MatrixRankTransform", matrix = NULL ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for MatrixRankTransform objects setMethod("matrix_type", signature(x = "MatrixRankTransform"), function(x) "double") setMethod("iterate_matrix", "MatrixRankTransform", function(x) { iter_function <- get(sprintf("iterate_matrix_rank_%s_cpp", matrix_type(x@matrix))) @@ -587,9 +696,15 @@ rank_transform <- function(mat, axis) { # Row sums and row means #' @param x IterableMatrix object -#' @describeIn IterableMatrix-methods Calculate rowSums +#' @describeIn IterableMatrix-methods-stats Calculate rowSums #' @return * `rowSums()`: vector of row sums #' @examples +#' ## Prep data +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' mat +#' #' ####################################################################### #' ## rowSums() example #' ####################################################################### @@ -608,7 +723,7 @@ setMethod("rowSums", signature(x = "IterableMatrix"), function(x) { }) #' @param x IterableMatrix object -#' @describeIn IterableMatrix-methods Calculate colSums +#' @describeIn IterableMatrix-methods-stats Calculate colSums #' @return * `colSums()`: vector of col sums #' @examples #' ####################################################################### @@ -629,7 +744,7 @@ setMethod("colSums", signature(x = "IterableMatrix"), function(x) { }) #' @param x IterableMatrix object -#' @describeIn IterableMatrix-methods Calculate rowMeans +#' @describeIn IterableMatrix-methods-stats Calculate rowMeans #' @return * `rowMeans()`: vector of row means #' @examples #' ####################################################################### @@ -641,11 +756,11 @@ setMethod("colSums", signature(x = "IterableMatrix"), function(x) { setMethod("rowMeans", signature(x = "IterableMatrix"), function(x) rowSums(x) / ncol(x)) #' @param x IterableMatrix object -#' @describeIn IterableMatrix-methods Calculate colMeans +#' @describeIn IterableMatrix-methods-stats Calculate colMeans #' @return * `colMeans()`: vector of col means #' @examples #' ####################################################################### -#' ## colMeans() example +#' # colMeans() example #' ####################################################################### #' colMeans(mat) #' @@ -662,7 +777,8 @@ setMethod("colMeans", signature(x = "IterableMatrix"), function(x) colSums(x) / # - In summary, BPCells::rowVars and BPCells::colVars will work on all inputs, and so will MatrixGenerics::rowVars and # MatrixGenerics::colVars. matrixStats::rowVars and matrixStats::colVars will only work on base R matrix objects. -#' @describeIn IterableMatrix-methods Calculate colVars (replacement for `matrixStats::colVars()`) +#' @describeIn IterableMatrix-methods-stats Calculate colVars (replacement for `matrixStats::colVars()`) +#' @param center Optional center values (vector of length nrow(x) or ncol(x)) #' @return * `colVars()`: vector of col variance #' @examples #' ####################################################################### @@ -673,7 +789,19 @@ setMethod("colMeans", signature(x = "IterableMatrix"), function(x) colSums(x) / #' #' @export colVars <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, ..., useNames = TRUE) UseMethod("colVars") -#' @export +#' @describeIn IterableMatrix-matrixgenerics Calculate colVars (replacement for `matrixStats::colVars()`) +#' @param center Optional center values (vector of length nrow(x) or ncol(x)) +#' @return * `colVars()`: vector of col variance +#' @examples +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' ####################################################################### +#' ## colVars() example +#' ####################################################################### +#' colVars(mat) +#' +#' @method colVars default colVars.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, ..., useNames = TRUE) { if (requireNamespace("MatrixGenerics", quietly = TRUE)) { MatrixGenerics::colVars(x, rows=rows, cols=cols, na.rm=na.rm, center=center, ..., useNames=useNames) @@ -684,6 +812,15 @@ colVars.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = } } #' @export +#' @param center Optional center values (vector of length nrow(x) or ncol(x)) +#' @return * `colVars()`: vector of col variance +#' @examples +#' ####################################################################### +#' ## colVars() example +#' ####################################################################### +#' colVars(mat) +#' +#' @describeIn IterableMatrix-matrixgenerics Calculate colVars (replacement for `matrixStats::colVars()`) colVars.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, ..., useNames = TRUE) { if (!is.null(rows) || !is.null(cols) || !isFALSE(na.rm) || !is.null(center) || !isTRUE(useNames)) { stop("colVars(IterableMatrix) doesn't support extra arguments rows, cols, na.rm, center, or useNames") @@ -696,11 +833,30 @@ rlang::on_load({ } }) -#' @describeIn IterableMatrix-methods Calculate rowVars (replacement for `matrixStats::rowVars()`) +#' @describeIn IterableMatrix-methods-stats Calculate rowVars (replacement for `matrixStats::rowVars()`) (generic) #' @return * `rowVars()`: vector of row variance +#' @examples +#' ####################################################################### +#' ## rowVars() example +#' ####################################################################### +#' rowVars(mat) +#' +#' #' @export rowVars <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, ..., useNames = TRUE) UseMethod("rowVars") -#' @export +#' @describeIn IterableMatrix-matrixgenerics Calculate rowVars (replacement for `matrixStats::rowVars()`) +#' @param center Optional center values (vector of length nrow(x) or ncol(x)) +#' @return * `rowVars()`: vector of row variance +#' @examples +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' ####################################################################### +#' ## rowVars() example +#' ####################################################################### +#' rowVars(mat) +#' +#' @method rowVars default rowVars.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, ..., useNames = TRUE) { if (requireNamespace("MatrixGenerics", quietly = TRUE)) { MatrixGenerics::rowVars(x, rows=rows, cols=cols, na.rm=na.rm, center=center, ..., useNames=useNames) @@ -711,6 +867,15 @@ rowVars.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = } } #' @export +#' @param center Optional center values (vector of length nrow(x) or ncol(x)) +#' @return * `rowVars()`: vector of row variance +#' @examples +#' ####################################################################### +#' ## rowVars() example +#' ####################################################################### +#' rowVars(mat) +#' +#' @describeIn IterableMatrix-matrixgenerics Calculate rowVars (replacement for `matrixStats::rowVars()`) rowVars.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL, ..., useNames = TRUE) { if (!is.null(rows) || !is.null(cols) || !isFALSE(na.rm) || !is.null(center) || !isTRUE(useNames)) { stop("rowVars(IterableMatrix) doesn't support extra arguments rows, cols, na.rm, center, or useNames") @@ -726,7 +891,7 @@ rlang::on_load({ #' Get the max of each row in an iterable matrix #' @param x IterableMatrix object/dgCMatrix object #' @return * `rowMaxs()`: vector of maxes for every row -#' @describeIn IterableMatrix-methods Calculate rowMaxs (replacement for `matrixStats::rowMaxs()`) +#' @describeIn IterableMatrix-methods-stats Calculate rowMaxs (replacement for `matrixStats::rowMaxs()`) (generic) #' @examples #' ####################################################################### #' ## rowMaxs() example @@ -735,20 +900,39 @@ rlang::on_load({ #' #' #' @export -rowMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) UseMethod("rowMaxs") -#' @export -rowMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) { +rowMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) UseMethod("rowMaxs") +#' @describeIn IterableMatrix-matrixgenerics Calculate rowMaxs (replacement for `matrixStats::rowMaxs()`) +#' @return * `rowMaxs()`: vector of row maxs +#' @examples +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' ####################################################################### +#' ## rowMaxs() example +#' ####################################################################### +#' rowMaxs(mat) +#' +#' @method rowMaxs default +rowMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { if (requireNamespace("MatrixGenerics", quietly = TRUE)) { - MatrixGenerics::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...) + MatrixGenerics::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ..., useNames = useNames) } else if (requireNamespace("matrixStats", quietly = TRUE)) { - matrixStats::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...) + matrixStats::rowMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ..., useNames = useNames) } else { stop("Can't run rowMaxs on a non-BPCells object unless MatrixGenerics or matrixStats are installed.") } } #' @export -rowMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) { +#' @return * `rowMaxs()`: vector of row maxs +#' @examples +#' ####################################################################### +#' ## rowMaxs() example +#' ####################################################################### +#' rowMaxs(mat) +#' +#' @describeIn IterableMatrix-matrixgenerics Calculate rowMaxs (replacement for `matrixStats::rowMaxs()`) +rowMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { if(!is.null(rows) || !is.null(cols) || !isFALSE(na.rm)) { stop("rowMaxs(IterableMatrix) doesn't support extra arguments rows, cols, or na.rm") } @@ -758,7 +942,7 @@ rowMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, . } else { res <- matrix_max_per_row_cpp(iter) } - names(res) <- rownames(x) + if (useNames) names(res) <- rownames(x) return(res) } rlang::on_load({ @@ -770,7 +954,7 @@ rlang::on_load({ #' Get the max of each col in an interable matrix #' @param x IterableMatrix/dgCMatrix object #' @return * `colMaxs()`: vector of column maxes -#' @describeIn IterableMatrix-methods Calculate colMax (replacement for `matrixStats::colMax()`) +#' @describeIn IterableMatrix-methods-stats Calculate colMax (replacement for `matrixStats::colMax()`) #' @examples #' ####################################################################### #' ## colMaxs() example @@ -779,27 +963,46 @@ rlang::on_load({ #' #' #' @export -colMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) UseMethod("colMaxs") -#' @export -colMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) { +colMaxs <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) UseMethod("colMaxs") +#' @describeIn IterableMatrix-matrixgenerics Calculate colMaxs (replacement for `matrixStats::colMaxs()`) +#' @return * `colMaxs()`: vector of col maxs +#' @examples +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' ####################################################################### +#' ## colMaxs() example +#' ####################################################################### +#' colMaxs(mat) +#' +#' @method colMaxs default +colMaxs.default <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { if (requireNamespace("MatrixGenerics", quietly = TRUE)) { - MatrixGenerics::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...) + MatrixGenerics::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ..., useNames = useNames) } else if (requireNamespace("matrixStats", quietly = TRUE)) { - matrixStats::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ...) + matrixStats::colMaxs(x, rows = rows, cols = cols, na.rm = na.rm, ..., useNames = useNames) } else { stop("Can't run colMaxs on a non-BPCells object unless MatrixGenerics or matrixStats are installed.") } } #' @export -colMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ...) { +#' @return * `colMaxs()`: vector of col maxs +#' @examples +#' ####################################################################### +#' ## colMaxs() example +#' ####################################################################### +#' colMaxs(mat) +#' +#' @describeIn IterableMatrix-matrixgenerics Calculate colMaxs (replacement for `matrixStats::colMaxs()`) +colMaxs.IterableMatrix <- function(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) { iter <- iterate_matrix(convert_matrix_type(x, "double")) if(x@transpose == TRUE) { res <- matrix_max_per_row_cpp(iter) } else { res <- matrix_max_per_col_cpp(iter) } - names(res) <- colnames(x) + if (useNames) names(res) <- colnames(x) return(res) } rlang::on_load({ @@ -823,6 +1026,7 @@ setClass("MatrixSubset", zero_dims = c(FALSE, FALSE) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for MatrixSubset objects setMethod("matrix_type", signature(x = "MatrixSubset"), function(x) matrix_type(x@matrix)) # Helper function to convert logical/character indexing into numeric indexing @@ -876,6 +1080,12 @@ unsplit_selection <- function(selection) { selection$subset[selection$reorder] } +#' @describeIn IterableMatrix-methods-core Subset an IterableMatrix +#' @examples +#' #' #################################################################### +#' ## x[i, j, ..., drop = TRUE] example +#' ####################################################################### +#' mat[1:2, 1:2] setMethod("[", "IterableMatrix", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") if (rlang::is_missing(i) && rlang::is_missing(j)) { @@ -912,6 +1122,17 @@ setMethod("[", "IterableMatrix", function(x, i, j, ...) { # Simulate assigning to a subset of the matrix. # We concatenate the un-modified matrix subsets with the new values, # then reorder rows/columns appropriately +#' @examples +#' ####################################################################### +#' ## x[i, j, ...] <- value example +#' ####################################################################### +#' mat_changed <- mat +#' new_mat <- matrix(rep(2,4), nrow = 2) %>% as("IterableMatrix") +#' mat_changed[1:2, 1:2] <- new_mat +#' mat_changed %>% as("dgCMatrix") +#' +#' +#' @describeIn IterableMatrix-methods-core Assign into an IterableMatrix setMethod("[<-", "IterableMatrix", function(x, i, j, ..., value) { # Do type conversions if needed if (is.matrix(value)) value <- as(value, "dgCMatrix") @@ -973,6 +1194,7 @@ setMethod("[<-", "IterableMatrix", function(x, i, j, ..., value) { return(x) }) +#' @describeIn IterableMatrix-misc-methods Subset MatrixSubset transforms setMethod("[", "MatrixSubset", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") @@ -1054,6 +1276,7 @@ setClass("RenameDims", matrix = "IterableMatrix" ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for RenameDims objects setMethod("matrix_type", "RenameDims", function(x) matrix_type(x@matrix)) setMethod("iterate_matrix", "RenameDims", function(x) { if (x@transpose) { @@ -1068,6 +1291,7 @@ setMethod("iterate_matrix", "RenameDims", function(x) { iter_function(iterate_matrix(x@matrix), row_names, col_names, is.null(rownames(x)), is.null(colnames(x))) }) +#' @describeIn IterableMatrix-misc-methods Subset RenameDims transforms setMethod("[", "RenameDims", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") @@ -1090,6 +1314,9 @@ setMethod("short_description", "RenameDims", function(x) { sprintf("Reset dimnames") ) }) +#' @export +#' @param value New dimnames (list of length 2, or NULL) +#' @describeIn IterableMatrix-methods-core Set dimnames of an IterableMatrix, similar to base R `dimnames<-()` setMethod("dimnames<-", signature(x = "IterableMatrix", value = "list"), function(x, value) { if (identical(dimnames(x), value)) return(x) d <- dim(x) @@ -1114,6 +1341,8 @@ setMethod("dimnames<-", signature(x = "IterableMatrix", value = "list"), functio } x }) +#' @export +#' @describeIn IterableMatrix-methods-core Remove dimnames of an IterableMatrix setMethod("dimnames<-", signature(x = "IterableMatrix", value = "NULL"), function(x, value) { if (identical(dimnames(x), value)) return(x) if (!is(x, "RenameDims")) { @@ -1169,6 +1398,7 @@ setClass("RowBindMatrices", threads = 0L ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for RowBindMatrices objects setMethod("matrix_type", signature(x = "RowBindMatrices"), function(x) matrix_type(x@matrix_list[[1]])) setMethod("iterate_matrix", "RowBindMatrices", function(x) { @@ -1277,6 +1507,7 @@ setClass("ColBindMatrices", threads = 0L ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for ColBindMatrices objects setMethod("matrix_type", signature(x = "ColBindMatrices"), function(x) matrix_type(x@matrix_list[[1]])) setMethod("iterate_matrix", "ColBindMatrices", function(x) { @@ -1359,6 +1590,7 @@ setMethod("cbind2", signature(x = "IterableMatrix", y = "dgCMatrix"), function(x setMethod("cbind2", signature(x = "dgCMatrix", y = "IterableMatrix"), function(x, y, ...) cbind2(as(x, "IterableMatrix"), y)) # Row bind needs specialization because there's not a default row-seek operation +#' @describeIn IterableMatrix-misc-methods Subset RowBindMatrices transforms setMethod("[", "RowBindMatrices", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") # Handle transpose via recursive call @@ -1429,6 +1661,7 @@ setMethod("[", "RowBindMatrices", function(x, i, j, ...) { return(x) }) +#' @describeIn IterableMatrix-misc-methods Subset ColBindMatrices transforms setMethod("[", "ColBindMatrices", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") # Handle transpose via recursive call @@ -1579,6 +1812,7 @@ setClass("PackedMatrixMem_uint32_t", val_idx_offsets = numeric(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for PackedMatrixMem_uint32_t objects setMethod("matrix_type", "PackedMatrixMem_uint32_t", function(x) "uint32_t") setMethod("iterate_matrix", "PackedMatrixMem_uint32_t", function(x) { if (x@transpose) x <- t(x) @@ -1591,6 +1825,7 @@ setClass("PackedMatrixMem_float", slots = c(val = "integer"), prototype = list(val = integer(0)) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for PackedMatrixMem_float objects setMethod("matrix_type", "PackedMatrixMem_float", function(x) "float") setMethod("iterate_matrix", "PackedMatrixMem_float", function(x) { if (x@transpose) x <- t(x) @@ -1603,6 +1838,7 @@ setClass("PackedMatrixMem_double", slots = c(val = "numeric"), prototype = list(val = numeric(0)) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for PackedMatrixMem_double objects setMethod("matrix_type", "PackedMatrixMem_double", function(x) "double") setMethod("iterate_matrix", "PackedMatrixMem_double", function(x) { if (x@transpose) x <- t(x) @@ -1634,6 +1870,7 @@ setClass("UnpackedMatrixMem_uint32_t", slots = c(val = "integer"), prototype = list(val = integer()) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for UnpackedMatrixMem_uint32_t objects setMethod("matrix_type", "UnpackedMatrixMem_uint32_t", function(x) "uint32_t") setMethod("iterate_matrix", "UnpackedMatrixMem_uint32_t", function(x) { if (x@transpose) x <- t(x) @@ -1646,6 +1883,7 @@ setClass("UnpackedMatrixMem_float", slots = c(val = "integer"), prototype = list(val = integer(0)) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for UnpackedMatrixMem_float objects setMethod("matrix_type", "UnpackedMatrixMem_float", function(x) "float") setMethod("iterate_matrix", "UnpackedMatrixMem_float", function(x) { if (x@transpose) x <- t(x) @@ -1658,6 +1896,7 @@ setClass("UnpackedMatrixMem_double", slots = c(val = "numeric"), prototype = list(val = numeric(0)) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for UnpackedMatrixMem_double objects setMethod("matrix_type", "UnpackedMatrixMem_double", function(x) "double") setMethod("iterate_matrix", "UnpackedMatrixMem_double", function(x) { if (x@transpose) x <- t(x) @@ -1689,7 +1928,8 @@ setMethod("iterate_matrix", "UnpackedMatrixMem_double", function(x) { #' ## A regular transpose operation switches a user's rows and cols #' t(mat) #' -#' ## Running `transpose_storage_order()` instead changes whether the storage is in row-major or col-major, +#' ## Running `transpose_storage_order()` instead changes whether +#' ## the storage is in row-major or col-major, #' ## but does not switch the rows and cols #' transpose_storage_order(mat) #' @export @@ -1736,7 +1976,7 @@ transpose_storage_order <- function(matrix, outdir = tempfile("transpose"), tmpd #' as it can only be applied to the indexes of each entry but not the values. #' There will still be some space savings, but far less than for counts matrices. #' -#' @param matrix Input matrix, either IterableMatrix or dgCMatrix +#' @param mat Input matrix, either IterableMatrix or dgCMatrix #' @param compress Whether or not to compress the data. #' @return BPCells matrix object #' @examples @@ -1800,6 +2040,7 @@ setClass("MatrixDir", type = character(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for MatrixDir objects setMethod("matrix_type", "MatrixDir", function(x) x@type) setMethod("matrix_inputs", "MatrixDir", function(x) list()) @@ -1918,6 +2159,7 @@ setClass("EXPERIMENTAL_MatrixDirCompressedCol", buffer_size = integer(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for EXPERIMENTAL_MatrixDirCompressedCol objects setMethod("matrix_type", "EXPERIMENTAL_MatrixDirCompressedCol", function(x) "uint32_t") setMethod("matrix_inputs", "EXPERIMENTAL_MatrixDirCompressedCol", function(x) list()) @@ -2021,6 +2263,7 @@ setClass("MatrixH5", type = character(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for MatrixH5 objects setMethod("matrix_type", "MatrixH5", function(x) x@type) setMethod("matrix_inputs", "MatrixH5", function(x) list()) @@ -2158,6 +2401,7 @@ setClass("10xMatrixH5", buffer_size = integer(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for 10xMatrixH5 objects setMethod("matrix_type", "10xMatrixH5", function(x) x@type) setMethod("matrix_inputs", "10xMatrixH5", function(x) list()) setMethod("iterate_matrix", "10xMatrixH5", function(x) { @@ -2357,6 +2601,7 @@ setClass("AnnDataMatrixH5", buffer_size = integer(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for AnnDataMatrixH5 objects setMethod("matrix_type", "AnnDataMatrixH5", function(x) x@type) setMethod("matrix_inputs", "AnnDataMatrixH5", function(x) list()) setMethod("iterate_matrix", "AnnDataMatrixH5", function(x) { @@ -2384,6 +2629,7 @@ setMethod("short_description", "AnnDataMatrixH5", function(x) { #' than the dense variant (see details for more information). #' #' @inheritParams open_matrix_hdf5 +#' @param mat IterableMatrix to write to hdf5 file #' @return AnnDataMatrixH5 object, with cells as the columns. #' @details #' **Efficiency considerations**: Reading from a dense AnnData matrix will generally be slower @@ -2522,6 +2768,26 @@ write_matrix_anndata_hdf5_dense <- function(mat, path, dataset = "X", buffer_siz #' #' When importing from 10x mtx files, the row and column names can be read automatically #' using the `import_matrix_market_10x()` convenience function. +#' @examples +#' # Create a dummy matrix market file +#' matrix_market_path <- file.path(tempdir(), "matrix.mtx") +#' lines <- c( +#' "%%MatrixMarket matrix coordinate integer general", +#' "% This is a comment", +#' "5 10 3", +#' "1 1 1", +#' "2 2 2", +#' "5 10 3" +#' ) +#' writeLines(lines, matrix_market_path) +#' +#' # Import the matrix market file +#' mat <- import_matrix_market(matrix_market_path) +#' mat +#' +#' # Clean up +#' unlink(matrix_market_path) +#' #' @export import_matrix_market <- function( mtx_path, outdir = tempfile("matrix_market"), row_names = NULL, col_names = NULL, row_major = FALSE, @@ -2554,6 +2820,14 @@ import_matrix_market <- function( #' @rdname import_matrix_market #' @param mtx_dir Directory holding matrix.mtx.gz, barcodes.tsv.gz, and features.tsv.gz #' @param feature_type String or vector of feature types to include. (cellranger 3.0 and newer) +#' @examples +#' \dontrun{ +#' # Import 10x matrix market directory +#' mat <- import_matrix_market_10x( +#' "path/to/10x/matrix_market_dir" +#' ) +#' } +#' #' @export import_matrix_market_10x <- function( mtx_dir, outdir = tempfile("matrix_market"), feature_type=NULL, row_major = FALSE, @@ -2608,9 +2882,12 @@ setClass("PeakMatrix", mode = "insertions" ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for PeakMatrix objects setMethod("matrix_type", "PeakMatrix", function(x) "uint32_t") setMethod("matrix_inputs", "PeakMatrix", function(x) list()) + + #' Calculate ranges x cells overlap matrix #' @param fragments Input fragments object. Must have cell names and chromosome names defined #' @param ranges `r document_granges("Peaks/ranges to overlap,")` @@ -2634,20 +2911,43 @@ setMethod("matrix_inputs", "PeakMatrix", function(x) list()) #' spans the peak even if neither the start or end falls within the peak #' @examples #' ## Prep demo data -#' frags <- get_demo_frags(subset = FALSE) -#' chrom_sizes <- read_ucsc_chrom_sizes(file.path(tempdir(), "references"), genome="hg38") -#' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") -#' frags_filter_blacklist <- frags %>% select_regions(blacklist, invert_selection = TRUE) -#' peaks <- call_peaks_tile( -#' frags_filter_blacklist, -#' chrom_sizes, -#' effective_genome_size = 2.8e9 +#' frags <- tibble::tribble( +#' ~chr, ~start, ~end, ~cell_id, +#' "chr1", 0, 5, "cell1", +#' "chr1", 2, 4, "cell2", +#' "chr2", 3, 6, "cell1", +#' "chr3", 7, 9, "cell2" +#' ) %>% convert_to_fragments() +#' frags +#' +#' # Note: this is how we would normally call peaks given this data +#' # We use a toy example here +#' # chrom_sizes <- read_ucsc_chrom_sizes(file.path(tempdir(), "references"), genome="hg38") +#' # blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") +#' # frags %>% select_regions(blacklist, invert_selection = TRUE) +#' # peaks <- call_peaks_tile( +#' # frags_filter_blacklist, +#' # chrom_sizes, +#' # effective_genome_size = 2.8e9 +#' # ) +#' peaks <- tibble::tribble( +#' ~chr, ~start, ~end, ~group, ~p_val, ~q_val, ~enrichment, +#' "chr1", 1, 4, "all", 0, 0, 767, +#' "chr2", 2, 8, "all", 0, 0, 766, +#' "chr3", 5, 10, "all", 0, 0, 645 #' ) -#' top_peaks <- head(peaks, 5000) -#' top_peaks <- top_peaks[order_ranges(top_peaks, chrNames(frags)),] +#' peaks +#' +#' # We would normally select the top peaks like this: +#' # peaks <- head(peaks, 5000) +#' # peaks <- peaks[order_ranges(peaks, chrNames(frags)),] #' #' ## Get peak matrix -#' peak_matrix(frags_filter_blacklist, top_peaks, mode="insertions") +#' peak_mat <- peak_matrix(frags, peaks, mode="insertions") +#' peak_mat +#' +#' peak_mat %>% as("dgCMatrix") +#' #' @export peak_matrix <- function(fragments, ranges, mode = c("insertions", "fragments", "overlaps"), zero_based_coords = !is(ranges, "GRanges"), explicit_peak_names = TRUE) { assert_is(fragments, "IterableFragments") @@ -2699,6 +2999,7 @@ setMethod("short_description", "PeakMatrix", function(x) { ) }) +#' @describeIn IterableMatrix-misc-methods Subset a PeakMatrix setMethod("[", "PeakMatrix", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") # Handle transpose via recursive call @@ -2750,6 +3051,7 @@ setClass("TileMatrix", mode = character(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for TileMatrix objects setMethod("matrix_type", "TileMatrix", function(x) "uint32_t") setMethod("matrix_inputs", "TileMatrix", function(x) list()) @@ -2877,6 +3179,7 @@ setMethod("short_description", "TileMatrix", function(x) { ) }) +#' @describeIn IterableMatrix-misc-methods Subset a TileMatrix setMethod("[", "TileMatrix", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") @@ -2929,6 +3232,7 @@ setClass("ConvertMatrixType", type = character(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for ConvertMatrixType objects setMethod("matrix_type", signature(x = "ConvertMatrixType"), function(x) x@type) setMethod("iterate_matrix", "ConvertMatrixType", function(x) { iter_function <- get(sprintf("convert_matrix_%s_%s_cpp", matrix_type(x@matrix), matrix_type(x))) @@ -2942,6 +3246,7 @@ setMethod("short_description", "ConvertMatrixType", function(x) { ) }) +#' @describeIn IterableMatrix-misc-methods Subset ConvertMatrixType transforms setMethod("[", "ConvertMatrixType", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") @@ -2994,22 +3299,26 @@ convert_matrix_type <- function(matrix, type = c("uint32_t", "double", "float")) #' Convert between BPCells matrix and R objects. #' -#' BPCells matrices can be interconverted with Matrix package +#' BPCells matrices can be interconverted with Matrix package #' dgCMatrix sparse matrices, as well as base R #' dense matrices (though this may result in high memory usage for large matrices) #' -#' @usage -#' # Convert to R from BPCells -#' as(bpcells_mat, "dgCMatrix") # Sparse matrix conversion -#' as.matrix(bpcells_mat) # Dense matrix conversion -#' -#' # Convert to BPCells from R -#' as(dgc_mat, "IterableMatrix") +#' @aliases as.matrix.IterableMatrix +#' @aliases as.matrix,IterableMatrix-method +#' +#' @details Use base R's `as()` to convert between BPCells matrices and +#' `dgCMatrix`/`matrix` representations, while `as.matrix()` materialises +#' dense matrices directly when needed. +#' @param x Matrix object to convert +#' @param ... Additional arguments passed to methods #' @examples -#' mat <- get_demo_mat()[1:2, 1:2] +#' # setup data +#' mat <- matrix(1:25, nrow=5, ncol=5) +#' rownames(mat) <- paste0("gene", seq_len(5)) +#' colnames(mat) <- paste0("cell", seq_len(5)) +#' mat <- mat %>% as("IterableMatrix") #' mat #' -#' #' ####################################################################### #' ## as(bpcells_mat, "dgCMatrix") example #' ####################################################################### @@ -3029,6 +3338,36 @@ convert_matrix_type <- function(matrix, type = c("uint32_t", "double", "float")) #' as(mat_dgc, "IterableMatrix") #' #' +#' @rdname matrix_R_conversion +#' @name matrix_R_conversion_coercions +#' @usage +#' \method{as.matrix}{IterableMatrix}(x, ...) +NULL + +#' Convert between BPCells matrices and R objects misc. +#' +#' See `matrix_R_conversion` for main page. +#' BPCells matrices can be interconverted with Matrix package `dgCMatrix` sparse matrices, +#' as well as base R dense matrices (though this may result in high memory usage for large matrices). +#' +#' @param from Object supplied to `base::coerce()` (typically generated by `as()`) +#' @param to Target class name for coercion +#' @param ... Additional arguments passed to methods +#' @aliases coerce,dgCMatrix,IterableMatrix-method +#' @aliases coerce,IterableMatrix,dgCMatrix-method +#' @aliases coerce,matrix,IterableMatrix-method +#' @aliases coerce,IterableMatrix,matrix-method +#' @usage +#' \S4method{coerce}{dgCMatrix,IterableMatrix}(from, to, ...) +#' \S4method{coerce}{IterableMatrix,dgCMatrix}(from, to, ...) +#' \S4method{coerce}{matrix,IterableMatrix}(from, to, ...) +#' \S4method{coerce}{IterableMatrix,matrix}(from, to, ...) +#' @name matrix_r_conversion-misc +#' @rdname matrix_r_conversion-misc +#' @docType methods +#' @keywords internal +NULL + #' @name matrix_R_conversion NULL @@ -3041,6 +3380,7 @@ setClass("Iterable_dgCMatrix_wrapper", mat = NULL ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for Iterable_dgCMatrix_wrapper objects setMethod("matrix_type", signature(x = "Iterable_dgCMatrix_wrapper"), function(x) "double") setMethod("matrix_inputs", "Iterable_dgCMatrix_wrapper", function(x) list()) @@ -3208,8 +3548,48 @@ matrix_stats <- function(matrix, } +#' Calculate svds +#' +#' Use the C++ Spectra solver (same as RSpectra package) to compute the largest +#' `k` singular values and corresponding singular vectors. Empirically, memory +#' use is much lower than `irlba::irlba()` because the solver runs entirely in C++. +#' +#' When RSpectra is installed, this adds a method to `RSpectra::svds()` for +#' `IterableMatrix` objects; otherwise it falls back to the bundled implementation. +#' +#' @param A The matrix whose truncated SVD is to be computed. +#' @param k Number of singular values requested. +#' @param nu Number of left singular vectors to compute (must equal `k` for IterableMatrix). +#' @param nv Number of right singular vectors to compute (must equal `k` for IterableMatrix). +#' @param opts Control parameters related to the computing algorithm (see Details). +#' @param threads Threads to use for matrix-vector products (BPCells specific). +#' @param ... Additional arguments passed to `RSpectra::svds()` when running on non-BPCells matrices. +#' @return A list with components `d`, `u`, `v`, `nconv`, `niter`, and `nops`. +#' @details +#' The `opts` list may include: +#' * `ncv`: Number of Lanczos basis vectors (`k < ncv <= min(m, n)`). Default `min(p, max(2*k+1, 20))`. +#' * `tol`: Precision parameter (default `1e-10`). +#' * `maxitr`: Maximum iterations (default `1000`). +#' * `center`, `scale`: Centering/scaling options (ignored for BPCells). +#' @examples +#' mat <- matrix(rnorm(500), nrow = 50, ncol = 10) +#' rownames(mat) <- paste0("gene", seq_len(50)) +#' colnames(mat) <- paste0("cell", seq_len(10)) +#' mat <- mat %>% as("dgCMatrix") %>% as("IterableMatrix") +#' +#' svd_res <- svds(mat, k = 5) +#' names(svd_res) +#' svd_res$d +#' dim(svd_res$u) +#' dim(svd_res$v) +#' +#' # Can also pass values directly into RSpectra::svds +#' svd_res <- svds(mat, k = 5, opts = c(maxitr = 500)) +#' +#' @references +#' Qiu Y, Mei J (2022). RSpectra: Solvers for Large-Scale Eigenvalue and SVD Problems. R package version 0.16-1. #' @export -svds <- function (A, k, nu = k, nv = k, opts = list(), ...) UseMethod("svds") +svds <- function (A, k, nu = k, nv = k, opts = list(), threads = 0L, ...) UseMethod("svds") # RSpectra exports svds as an S3 Generic, but is a suggested dependency # With this approach, IterableMatrix objects will work with RSpectra::svds, @@ -3220,7 +3600,15 @@ rlang::on_load({ vctrs::s3_register("RSpectra::svds", "IterableMatrix") }) +#' Internal svds methods +#' @keywords internal +#' @name svds-internal +#' @rdname svds-internal +NULL + #' @export +#' @rdname svds-internal +#' @keywords internal svds.default <- function(A, k, nu = k, nv = k, opts = list(), ...) { if (requireNamespace("RSpectra", quietly = TRUE)) { RSpectra::svds(A=A, k=k, nu=nu, nv=nv, opts=opts, ...) @@ -3230,6 +3618,8 @@ svds.default <- function(A, k, nu = k, nv = k, opts = list(), ...) { } #' @export +#' @rdname svds-internal +#' @keywords internal svds.IterableMatrix <- function(A, k, nu = k, nv = k, opts = list(), threads=0, ...) { assert_is_wholenumber(threads) assert_is_wholenumber(k) @@ -3271,7 +3661,7 @@ svds.IterableMatrix <- function(A, k, nu = k, nv = k, opts = list(), threads=0, #' hexidecimal format. #' @details #' `checksum()` converts the non-zero elements of the sparse input matrix to double -#' precision, concatenates each element value with the element row and column index words, +#' precision, concatenates each element value with theF element row and column index words, #' and uses these 16-byte blocks along with the matrix dimensions and row and column #' names to calculate the checksum. The checksum value depends on the storage order so #' column- and row-order matrices with the same element values give different checksum @@ -3281,8 +3671,6 @@ svds.IterableMatrix <- function(A, k, nu = k, nv = k, opts = list(), threads=0, #' @param matrix IterableMatrix object #' @return MD5 checksum string in hexidecimal format. #' @examples -#' library(Matrix) -#' library(BPCells) #' m1 <- matrix(seq(1,12), nrow=3) #' m2 <- as(m1, 'dgCMatrix') #' m3 <- as(m2, 'IterableMatrix') @@ -3291,7 +3679,7 @@ svds.IterableMatrix <- function(A, k, nu = k, nv = k, opts = list(), threads=0, checksum <- function(matrix) { assert_is(matrix, "IterableMatrix") - iter <- iterate_matrix(BPCells:::convert_matrix_type(matrix, "double")) + iter <- iterate_matrix(convert_matrix_type(matrix, "double")) checksum_double_cpp(iter) } @@ -3333,10 +3721,12 @@ checksum <- function(matrix) { #' ## Get mean of every row #' #' ## expect an error in the case that col-major matrix is passed -#' apply_by_row(mat, function(val, row, col) {sum(val) / nrow(mat)}) %>% -#' unlist() -#' +#' try( +#' apply_by_row(mat, function(val, row, col) {sum(val) / nrow(mat)}) %>% +#' unlist() +#' ) #' ## Need to transpose matrix to make sure it is in row-order +#' #' mat_row_order <- transpose_storage_order(mat) #' #' ## works as expected for row major @@ -3386,4 +3776,3 @@ apply_by_col <- function(mat, fun, ...) { } apply_matrix_double_cpp(iterate_matrix(convert_matrix_type(mat, "double")), f, FALSE) } - diff --git a/r/R/matrix_stats.R b/r/R/matrix_stats.R index e68dc102..cadf6e11 100644 --- a/r/R/matrix_stats.R +++ b/r/R/matrix_stats.R @@ -8,11 +8,16 @@ #' Find the nth quantile value(s) of each row in a matrix. Only supports transposed matrices. #' @param x IterableMatrix object or a matrix-like object. +#' @param rows (Integer) Optional vector of row indices to operate over. +#' @param cols (Integer) Optional vector of column indices to operate over. #' @param probs (Numeric) Quantile value(s) to be computed, between 0 and 1. +#' @param na.rm (Logical) Should missing values (NA) be removed? #' @param type (Integer) between 4 and 9 selecting which quantile algorithm to use, detailed in `matrixStats::rowQuantiles()` +#' @param useNames (Logical) Whether to use row and column names in the output. +#' @param drop (Logical) If TRUE and only one quantile is requested, the result is coerced to a vector (For non-BPCells objects). #' @return - `rowQuantiles():` If `length(probs) == 1`, return a numeric with number of entries equal to the number of rows in the matrix. #' Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a row in the input matrix. -#' @describeIn IterableMatrix-methods Calculate rowQuantiles (replacement for `matrixStats::rowQuantiles`) +#' @describeIn IterableMatrix-methods-stats Calculate rowQuantiles (replacement for `matrixStats::rowQuantiles`) #' @usage rowQuantiles( #' x, #' rows = NULL, @@ -39,7 +44,18 @@ rowQuantiles <- function(x, rows = NULL, cols = NULL, useNames = TRUE, drop = TRUE) { UseMethod("rowQuantiles") } -#' @export +#' @describeIn IterableMatrix-matrixgenerics Calculate rowQuantiles (replacement for `matrixStats::rowQuantiles`) +#' @return * `rowQuantiles()`: If `length(probs) == 1`, return a numeric with number of entries equal to the number of rows in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a row in the input matrix. +#' @examples +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' ####################################################################### +#' ## rowQuantiles() example +#' ####################################################################### +#' rowQuantiles(transpose_storage_order(mat)) +#' +#' @method rowQuantiles default rowQuantiles.default <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., @@ -53,6 +69,14 @@ rowQuantiles.default <- function(x, rows = NULL, cols = NULL, } } #' @export +#' @return * `rowQuantiles()`: If `length(probs) == 1`, return a numeric with number of entries equal to the number of rows in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a row in the input matrix. +#' @examples +#' ####################################################################### +#' ## rowQuantiles() example +#' ####################################################################### +#' rowQuantiles(transpose_storage_order(mat)) +#' +#' @describeIn IterableMatrix-matrixgenerics Calculate rowQuantiles (replacement for `matrixStats::rowQuantiles`) rowQuantiles.IterableMatrix <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., @@ -94,8 +118,7 @@ rowQuantiles.IterableMatrix <- function(x, rows = NULL, cols = NULL, #' Find the nth quantile value(s) of each column in a matrix. Only supports non-transposed matrices. #' @return - `colQuantiles():` If `length(probs) == 1`, return a numeric with number of entries equal to the number of columns in the matrix. #' Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a col in the input matrix. -#' @describeIn IterableMatrix-methods Calculate colQuantiles (replacement for `matrixStats::colQuantiles`) -#' @inheritParams rowQuantiles +#' @describeIn IterableMatrix-methods-stats Calculate colQuantiles (replacement for `matrixStats::colQuantiles`) #' @usage colQuantiles( #' x, #' rows = NULL, @@ -122,7 +145,18 @@ colQuantiles <- function(x, rows = NULL, cols = NULL, useNames = TRUE, drop = TRUE) { UseMethod("colQuantiles") } -#' @export +#' @describeIn IterableMatrix-matrixgenerics Calculate colQuantiles (replacement for `matrixStats::colQuantiles`) +#' @return * `colQuantiles()`: If `length(probs) == 1`, return a numeric with number of entries equal to the number of columns in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a col in the input matrix. +#' @examples +#' mat <- matrix(1:25, nrow = 5) %>% as("dgCMatrix") +#' mat +#' mat <- as(mat, "IterableMatrix") +#' ####################################################################### +#' ## colQuantiles() example +#' ####################################################################### +#' colQuantiles(mat) +#' +#' @method colQuantiles default colQuantiles.default <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., @@ -136,6 +170,14 @@ colQuantiles.default <- function(x, rows = NULL, cols = NULL, } } #' @export +#' @return * `colQuantiles()`: If `length(probs) == 1`, return a numeric with number of entries equal to the number of columns in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a col in the input matrix. +#' @examples +#' ####################################################################### +#' ## colQuantiles() example +#' ####################################################################### +#' colQuantiles(mat) +#' +#' @describeIn IterableMatrix-matrixgenerics Calculate colQuantiles (replacement for `matrixStats::colQuantiles`) colQuantiles.IterableMatrix <- function(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by = 0.25), na.rm = FALSE, type = 7L, digits = 7L, ..., @@ -178,4 +220,43 @@ rlang::on_load({ setMethod(MatrixGenerics::colQuantiles, "IterableMatrix", colQuantiles.IterableMatrix) setMethod(MatrixGenerics::rowQuantiles, "IterableMatrix", rowQuantiles.IterableMatrix) } -}) \ No newline at end of file +}) + +#' MatrixGenerics methods for IterableMatrix +#' +#' S4 methods enabling MatrixGenerics generics (e.g., \code{rowQuantiles}, +#' \code{colQuantiles}, \code{rowVars}, \code{colVars}, \code{rowMaxs}, \code{colMaxs}) +#' to operate on \code{IterableMatrix}. These are registered at runtime only +#' when MatrixGenerics is available. +#' +#' @section Availability: +#' Methods are registered conditionally; if MatrixGenerics is not installed, +#' nothing is registered and the generics fall back as usual. +#' +#' @param x An \code{IterableMatrix}. +#' @param rows (Integer) Optional vector of row indices to operate over. +#' @param cols (Integer) Optional vector of column indices to operate over. +#' @param na.rm (Logical) Should missing values (NA) be removed? +#' @param center Optional center values (vector of length nrow(x) or ncol(x)) +#' @param probs (Numeric) Quantile value(s) to be computed, between 0 and 1. +#' @param type (Integer) between 4 and 9 selecting which quantile algorithm to use, detailed in `matrixStats::rowQuantiles()` +#' @param digits Number of decimal places for quantile calculations +#' @param drop (Logical) If TRUE and only one quantile is requested, the result is coerced to a vector (For non-BPCells objects). +#' @param useNames (Logical) Whether to use row and column names in the output. +#' @param ... Passed to the underlying implementation. +#' +#' @name IterableMatrix-matrixgenerics +#' @docType methods +#' +#' @aliases rowQuantiles,IterableMatrix-method +#' @aliases colQuantiles,IterableMatrix-method +#' @aliases rowVars,IterableMatrix-method +#' @aliases colVars,IterableMatrix-method +#' @aliases rowMaxs,IterableMatrix-method +#' @aliases colMaxs,IterableMatrix-method +#' +#' @seealso \code{\link{rowQuantiles}}, \code{\link{colQuantiles}}, +#' \code{\link{rowVars}}, \code{\link{colVars}}, +#' \code{\link{rowMaxs}}, \code{\link{colMaxs}} +#' @keywords internal +NULL diff --git a/r/R/plots.R b/r/R/plots.R index 2683de84..46f222eb 100644 --- a/r/R/plots.R +++ b/r/R/plots.R @@ -20,6 +20,13 @@ #' @param n Minimum number of colors needed #' @return Character vector of hex color codes #' @rdname palettes +#' @examples +#' ####################################################################### +#' ## discrete_palette() example +#' ####################################################################### +#' discrete_palette("stallion") +#' +#' #' @export discrete_palette <- function(name, n = 1) { palettes <- list( @@ -85,6 +92,13 @@ discrete_palette <- function(name, n = 1) { } #' @rdname palettes +#' @examples +#' ####################################################################### +#' ## continuous_palette() example +#' ####################################################################### +#' continuous_palette("bluePurpleDark") +#' +#' #' @export continuous_palette <- function(name) { palettes <- list( @@ -186,21 +200,32 @@ plot_read_count_knee <- function(read_counts, cutoff = NULL, return_data = FALSE #' @param bins Number of bins for density calculation #' @inheritParams plot_embedding #' @examples +#' \dontrun{ #' ## Prep data #' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) #' genes <- read_gencode_transcripts( #' file.path(tempdir(), "references"), release = "42", #' annotation_set = "basic", -#' features = "transcript" +#' features = "transcript", timeout = 3000 #' ) #' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") #' atac_qc <- qc_scATAC(frags, genes, blacklist) +#' } #' #' +#' ## Use pre-computed QC metrics for chr4 +#' atac_qc <- readr::read_delim( +#' file.path( +#' system.file("extdata", package = "BPCells"), +#' "qc_results_filtered_example_chr_4.tsv.gz"), +#' delim = "\t", show_col_types = FALSE +#' ) #' ## Render tss enrichment vs fragment plot #' plot_tss_scatter(atac_qc, min_frags = 1000, min_tss = 10) +#' #' @export plot_tss_scatter <- function(atac_qc, min_frags = NULL, min_tss = NULL, bins = 100, apply_styling = TRUE) { + assert_has_package("hexbin") assert_is(atac_qc, "data.frame") if (!is.null(min_frags)) { assert_is_numeric(min_frags) @@ -277,7 +302,7 @@ plot_tss_scatter <- function(atac_qc, min_frags = NULL, min_tss = NULL, bins = 1 #' @inheritParams plot_embedding #' @return Numeric vector where index i contans the number of length-i fragments #' @examples -#' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) +#' frags <- get_demo_frags() #' plot_fragment_length(frags) #' @export plot_fragment_length <- function(fragments, max_length = 500, return_data = FALSE, apply_styling = TRUE) { @@ -320,18 +345,32 @@ plot_fragment_length <- function(fragments, max_length = 500, return_data = FALS #' @inheritParams footprint #' @param genes Coordinate ranges for genes (must include strand) #' @param smooth Number of bases to smooth over (rolling average) +#' @param colors Discrete color palette to use for cell groups #' @seealso `footprint()`, `plot_tf_footprint()` #' @examples +#' #' ## Prep data -#' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) +#' frags <- get_demo_frags() +#' \dontrun{ #' genes <- read_gencode_transcripts( #' file.path(tempdir(), "references"), release = "42", #' annotation_set = "basic", -#' features = "transcript" +#' features = "transcript", timeout = 3000 +#' ) +#' } +#' +#' ## Use pre-computed transcripts for chr4 +#' genes <- readr::read_delim( +#' file.path( +#' system.file("extdata", package = "BPCells"), +#' "transcripts_filtered_example_chr_4.tsv.gz"), +#' delim = "\t", show_col_types = FALSE #' ) #' +#' #' ## Plot tss profile #' plot_tss_profile(frags, genes) +#' #' @export plot_tss_profile <- function(fragments, genes, cell_groups = rlang::rep_along(cellNames(fragments), "all"), flank = 2000L, smooth = 0L, zero_based_coords = !is(genes, "GRanges"), @@ -390,10 +429,24 @@ plot_tss_profile <- function(fragments, genes, cell_groups = rlang::rep_along(ce #' @inheritParams footprint #' @param motif_positions Coordinate ranges for motifs (must include strand) and #' have constant width +#' @param colors Discrete color palette to use for cell groups #' @seealso `footprint()`, `plot_tss_profile()` +#' @examples +#' \dontrun{ +#' +#' plot_tf_footprint( +#' frags, +#' motif_positions$CEBPA, +#' cell_groups = cell_types, +#' flank = 250, +#' smooth = 2 +#' ) + +#' ggplot2::labs(title="CEBPA") +#' } +#' # See example in vignette Getting Started #' @export plot_tf_footprint <- function(fragments, motif_positions, cell_groups = rlang::rep_along(cellNames(fragments), "all"), - flank = 250L, smooth = 0L, zero_based_coords = !is(genes, "GRanges"), + flank = 250L, smooth = 0L, zero_based_coords = !is(motif_positions, "GRanges"), colors = discrete_palette("stallion"), return_data = FALSE, apply_styling = TRUE) { assert_is(fragments, "IterableFragments") @@ -432,6 +485,21 @@ plot_tf_footprint <- function(fragments, motif_positions, cell_groups = rlang::r #' @param n Internal-use parameter marking the number of nested calls. This is used for #' finding the name of the "source" input variable from the caller's perspective #' @return Data frame with one column for each feature requested +#' @examples +#' # Collect features from a matrix +#' mat <- get_demo_mat() +#' # By ID +#' features_id <- collect_features(mat, "ENSG00000272602", gene_mapping = NULL) +#' head(features_id) +#' +#' # By Gene Symbol (using default human_gene_mapping) +#' features_symbol <- collect_features(mat, "MS4A1") +#' head(features_symbol) +#' +#' # Collect features from a data frame +#' df <- data.frame(a = 1:5, b = 6:10) +#' features_df <- collect_features(df, c("a", "b")) +#' head(features_df) #' @export collect_features <- function(source, features = NULL, gene_mapping = human_gene_mapping, n = 1) { if (!is.null(features)) { @@ -521,14 +589,15 @@ collect_features <- function(source, features = NULL, gene_mapping = human_gene_ #' in a grid. If `return_data` or `return_plot_list` is called, the return value will #' match that argument. #' @examples -## Prep data +#' ## Prep data #' set.seed(123) -#' mat <- get_demo_mat() +#' mat <- get_demo_mat()[,sample(1:ncol(get_demo_mat()), 200)] #' ## Normalize matrix -#' mat_norm <- log1p(multiply_cols(mat, 1/colSums(mat)) * 10000) %>% write_matrix_memory(compress = FALSE) +#' mat_norm <- log1p(multiply_cols(mat, 1/colSums(mat)) * 10000) %>% +#' write_matrix_memory(compress = FALSE) #' ## Get variable genes #' stats <- matrix_stats(mat, row_stats = "variance") -#' variable_genes <- order(stats$row_stats["variance",], decreasing=TRUE) %>% +#' variable_genes <- order(stats$row_stats["variance",], decreasing=TRUE) %>% #' head(1000) %>% #' sort() #' # Z score normalize genes @@ -561,7 +630,6 @@ collect_features <- function(source, features = NULL, gene_mapping = human_gene_ #' # umap, #' # features = c("MS4A1", "CD3E"), #' #) -#' #' @export plot_embedding <- function(source, embedding, features = NULL, quantile_range = c(0.01, 0.99), @@ -800,6 +868,16 @@ plot_embedding <- function(source, embedding, features = NULL, #' Rotate ggplot x axis labels #' @param degrees Number of degrees to rotate by +#' @examples +#' mat <- get_demo_mat() +#' cell_types <- paste("Group", rep(1:3, length.out = length(colnames(mat)))) +#' +#' ## Plot dot +#' scale_next_plot_height(0.8) +#' plot_dot(mat, c("MS4A1", "CD3E"), cell_types) +#' +#' scale_next_plot_height(0.8) +#' plot_dot(mat, c("MS4A1", "CD3E"), cell_types) + rotate_x_labels(90) #' @export rotate_x_labels <- function(degrees = 45) { ggplot2::theme(axis.text.x = ggplot2::element_text(angle = degrees, hjust = 1, vjust = 1)) @@ -824,11 +902,8 @@ rotate_x_labels <- function(degrees = 45) { #' cell_types <- paste("Group", rep(1:3, length.out = length(colnames(mat)))) #' #' ## Plot dot -#' plot <- plot_dot(mat, c("MS4A1", "CD3E"), cell_types) -#' -#' BPCells:::render_plot_from_storage( -#' plot, width = 4, height = 5 -#' ) +#' scale_next_plot_height(0.8) +#' plot_dot(mat, c("MS4A1", "CD3E"), cell_types) #' @export plot_dot <- function(source, features, groups, group_order = NULL, gene_mapping = human_gene_mapping, colors = c("lightgrey", "#4682B4"), diff --git a/r/R/trackplots.R b/r/R/trackplots.R index 5f04cbb9..3ddaa6bb 100644 --- a/r/R/trackplots.R +++ b/r/R/trackplots.R @@ -47,6 +47,33 @@ wrap_trackplot <- function(plot, height=NULL, takes_sideplot=FALSE, region=NULL, plot } +# Internal helper function to extract patches from patchwork objects +# Replicates the logic of patchwork:::get_patches() to avoid using unexported functions +get_patchwork_patches <- function(plot) { + if (inherits(plot, "patchwork")) { + patches <- plot$patches + if (is.null(patches)) patches <- list(plots = list()) + + # Extract the base plot (without patchwork components) + base_plot <- plot + base_plot$patches <- NULL + class(base_plot) <- setdiff(class(base_plot), "patchwork") + if (inherits(base_plot, "free_plot")) { + attr(base_plot, "patchwork_free_settings") <- NULL + if (is.null(attr(base_plot, "free_settings"))) { + class(base_plot) <- setdiff(class(base_plot), "free_plot") + } + } + + # Combine existing patches with the base plot + patches$plots <- c(patches$plots, list(base_plot)) + return(patches) + } else { + # Return a patches object with just the single plot + return(list(plots = list(plot))) + } +} + # Internal helper function to return empty track plots if there's no data to be plotted trackplot_empty <- function(region, label) { ggplot2::ggplot(tibble::tibble(label=label)) + @@ -73,6 +100,13 @@ get_patchwork_plots <- function(patchwork) { #' @param labels character vector of labels -- must match existing number of facets in plot #' @return **set_trackplot_label**: ggplot object with adjusted facet labels #' @rdname trackplot_utils +#' @examples +#' region <- "chr4:3034877-3044877" +#' plot <- trackplot_scalebar(region) +#' plot_labeled <- set_trackplot_label(plot, "Scalebar") +#' +#' plot_height_set <- set_trackplot_height(plot, 2) +#' get_trackplot_height(plot_height_set) #' @export set_trackplot_label <- function(plot, labels) { stopifnot(is(plot, "ggplot")) @@ -123,13 +157,14 @@ get_trackplot_height <- function(plot) { } #' Calculate y positions for trackplot segments to avoid overlap +#' #' Steps: #' 1. Calculate the maximum overlap depth of transcripts #' 2. Iterate through start/end of segments in sorted order #' 3. Randomly assign each segment a y-coordinate between 1 and max overlap depth, -#' with the restriction that a segment can't have the same y-coordinate as an overlapping segment +#' with the restriction that a segment cannot have the same y-coordinate as an overlapping segment #' @param data tibble of genome ranges with start and end columns, assumed to be on same chromosome. -#' @return Vector of y coordinates, one per input row, such that no ranges at the same y coordinate overlap +#' @return Vector of y coordinates, one per input row, such that no ranges at the same y coordinate overlap. #' @keywords internal trackplot_calculate_segment_height <- function(data) { data$row_number <- seq_len(nrow(data)) @@ -252,21 +287,6 @@ trackplot_normalize_ranges_with_metadata <- function(data, metadata) { return(data) } -#' Render a plot with intermediate disk storage step -#' -#' Take a plotting object and save in temp storage, so it can be outputted with exact dimensions. -#' Primarily used to allow for adjusting plot dimensions within function reference examples. -#' @param plot (ggplot) ggplot output from a plotting function -#' @param width (numeric) width of rendered plot -#' @param height (numeric) height of rendered plot -#' @keywords internal -render_plot_from_storage <- function(plot, width, height) { - assert_is(plot, "ggplot") - image_path <- tempfile(fileext = ".png") - ggplot2::ggsave(image_path, plot, width = width, height = height) - img <- png::readPNG(image_path) - grid::grid.raster(img) -} #' Combine track plots #' @@ -285,6 +305,7 @@ render_plot_from_storage <- function(plot, width, height) { #' by heights. A shared title and x-axis are put at the top. #' @seealso `trackplot_coverage()`, `trackplot_gene()`, `trackplot_loop()`, `trackplot_scalebar()` #' @examples +#' \dontrun{ #' ## Prep data #' frags <- get_demo_frags() #' @@ -292,31 +313,48 @@ render_plot_from_storage <- function(plot, width, height) { #' genes <- read_gencode_transcripts( #' file.path(tempdir(), "references"), release = "42", #' annotation_set = "basic", -#' features = "transcript" +#' features = "transcript", timeout = 3000 #' ) #' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") #' read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags +#' } +#' +#' ## We use pre-generated data for this example +#' frags <- get_demo_frags() #' region <- "chr4:3034877-4034877" #' cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) -#' transcripts <- read_gencode_transcripts( -#' file.path(tempdir(), "references"), release = "42", -#' annotation_set = "basic" +#' genes <- readr::read_delim( +#' file.path( +#' system.file("extdata", package = "BPCells"), +#' "transcripts_filtered_example_chr_4.tsv.gz"), +#' delim = "\t", show_col_types = FALSE #' ) +#' read_counts <- readr::read_delim( +#' file.path( +#' system.file("extdata", package = "BPCells"), +#' "qc_results_filtered_example_chr_4.tsv.gz"), +#' delim = "\t", show_col_types = FALSE +#' )$nFrags #' region <- "chr4:3034877-4034877" #' -#' +#' #' ## Get all trackplots and scalebars to combine #' plot_scalebar <- trackplot_scalebar(region) -#' plot_gene <- trackplot_gene(transcripts, region) -#' plot_coverage <- trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts) -#' -#' +#' plot_gene <- trackplot_gene(genes, region) +#' plot_coverage <- trackplot_coverage( +#' frags, +#' region, +#' groups = cell_types, +#' cell_read_counts = read_counts +#' ) +#' #' ## Combine trackplots and render #' ## Also remove colors from gene track -#' plot <- trackplot_combine( +#' scale_next_plot_height(0.6) +#' trackplot_combine( #' list(plot_scalebar, plot_coverage, plot_gene + ggplot2::guides(color = "none")) #' ) -#' BPCells:::render_plot_from_storage(plot, width = 6, height = 4) +#' #' @export trackplot_combine <- function(tracks, side_plot = NULL, title = NULL, side_plot_width = 0.3) { for (plot in tracks) { @@ -444,7 +482,6 @@ trackplot_combine <- function(tracks, side_plot = NULL, title = NULL, side_plot_ #' or list/data.frame/GRanges of length 1 specifying chr, start, end. See `help("genomic-ranges-like")` for details #' @param fragments Fragments object #' @param cell_read_counts Numeric vector of read counts for each cell (used for normalization) -#' @param scale_bar Whether to include a scale bar in the top track (`TRUE` or `FALSE`) #' @param bins Number of bins to plot across the region #' @param clip_quantile (optional) Quantile of values for clipping y-axis limits. Default of 0.999 will crop out #' just the most extreme outliers across the region. NULL to disable clipping @@ -457,24 +494,35 @@ trackplot_combine <- function(tracks, side_plot = NULL, title = NULL, side_plot_ #' `TRUE`, the return value will be modified accordingly. #' @seealso `trackplot_combine()`, `trackplot_gene()`, `trackplot_loop()`, `trackplot_scalebar()` #' @examples -## Prep data +#' ## Prep data #' frags <- get_demo_frags() +#' cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) #' +#' \dontrun{ #' ## Use genes and blacklist to determine proper number of reads per cell #' genes <- read_gencode_transcripts( #' file.path(tempdir(), "references"), release = "42", #' annotation_set = "basic", -#' features = "transcript" +#' features = "transcript", timeout = 3000 #' ) #' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") +#' #' read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags +#' } #' region <- "chr4:3034877-4034877" -#' cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) #' +#' ## We use pre-generated data for this example +#' read_counts <- readr::read_delim( +#' file.path(system.file("extdata", package = "BPCells"), +#' "qc_results_filtered_example_chr_4.tsv.gz"), +#' delim = "\t", show_col_types = FALSE +#' )$nFrags #' -#' BPCells:::render_plot_from_storage( -#' trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts), -#' width = 6, height = 3 +#' +#' scale_next_plot_height(0.5) +#' trackplot_coverage( +#' frags, region, groups = cell_types, +#' cell_read_counts = read_counts #' ) #' @export trackplot_coverage <- function(fragments, region, groups, @@ -572,25 +620,31 @@ trackplot_coverage <- function(fragments, region, groups, #' #' Usually given as the output from `read_gencode_transcripts()` #' @inheritParams trackplot_coverage -#' @param labels Character vector with labels for each item in transcripts. NA for items that should not be labeled #' @param exon_size size for exon lines in units of mm #' @param gene_size size for intron/gene lines in units of mm -#' @param transcript_size size for transcript lines in units of mm #' @param label_size size for transcript labels in units of mm +#' @param track_label Label to put on the side of the track #' @return Plot of gene locations #' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_loop()`, `trackplot_scalebar()` #' @examples +#' \dontrun{ #' ## Prep data #' transcripts <- read_gencode_transcripts( #' file.path(tempdir(), "references"), release = "42", -#' annotation_set = "basic", features = "transcript" +#' annotation_set = "basic", features = "transcript", timeout = 3000 #' ) -#' region <- "chr4:3034877-4034877" +#' } #' +#' ## We use pre-generated data for this example +#' transcripts <- readr::read_delim( +#' file.path(system.file("extdata", package = "BPCells"), +#' "transcripts_filtered_example_chr_4.tsv.gz")) #' +#' region <- "chr4:3264877-3634877" +#' #' ## Plot gene trackplot -#' plot <- trackplot_gene(transcripts, region) -#' BPCells:::render_plot_from_storage(plot, width = 6, height = 1) +#' scale_next_plot_height(0.3) +#' trackplot_gene(transcripts, region) #' @export trackplot_gene <- function(transcripts, region, exon_size = 2.5, gene_size = 0.5, label_size = 11*.8/ggplot2::.pt, track_label="Genes", return_data = FALSE) { region <- normalize_ranges(region) @@ -686,6 +740,7 @@ trackplot_gene <- function(transcripts, region, exon_size = 2.5, gene_size = 0.5 #' @param colors Vector of hex color codes to use for the color scale. For numeric `color_by` data, this is passed to `ggplot2::scale_color_gradientn()`, #' otherwise it is interpreted as a discrete color palette in `ggplot2::scale_color_manual()` #' @param show_strand If TRUE, show strand direction as arrows +#' @param track_label Label to put on the side of the track #' @return Plot of genomic loci if return_data is FALSE, otherwise returns the data frame used to generate the plot #' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_loop()`, `trackplot_scalebar()`, `trackplot_gene()` #' @examples @@ -706,10 +761,8 @@ trackplot_gene <- function(transcripts, region, exon_size = 2.5, gene_size = 0.5 #' region <- "chr4:3034877-3044877" #' #' ## Plot peaks -#' BPCells:::render_plot_from_storage( -#' trackplot_genome_annotation(peaks, region, color_by = "enrichment"), -#' width = 6, height = 1 -#' ) +#' scale_next_plot_height(0.3) +#' trackplot_genome_annotation(peaks, region, color_by = "enrichment") #' @export trackplot_genome_annotation <- function(loci, region, color_by = NULL, colors = NULL, label_by = NULL, label_size = 11*.8/ggplot2::.pt, show_strand=FALSE, annotation_size = 2.5, track_label="Peaks", return_data = FALSE) { @@ -828,6 +881,7 @@ trackplot_genome_annotation <- function(loci, region, color_by = NULL, colors = #' otherwise it is interpreted as a discrete color palette in `ggplot2::scale_color_manual()` #' @param allow_truncated If FALSE, remove any loops that are not fully contained within `region` #' @param curvature Curvature value between 0 and 1. 1 is a 180-degree arc, and 0 is flat lines. +#' @param track_label Label to put on the side of the track #' @inheritParams trackplot_coverage #' #' @return Plot of loops connecting genomic coordinates @@ -843,8 +897,8 @@ trackplot_genome_annotation <- function(loci, region, color_by = NULL, colors = #' region <- "chr4:3034877-4034877" #' #' ## Plot loops -#' plot <- trackplot_loop(loops, region, color_by = "score") -#' BPCells:::render_plot_from_storage(plot, width = 6, height = 1.5) +#' scale_next_plot_height(0.3) +#' trackplot_loop(loops, region, color_by = "score") #' @export trackplot_loop <- function(loops, region, color_by=NULL, colors=NULL, allow_truncated=TRUE, curvature=0.75, track_label="Links", return_data = FALSE) { region <- normalize_ranges(region) @@ -944,9 +998,8 @@ trackplot_loop <- function(loops, region, color_by=NULL, colors=NULL, allow_trun #' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_gene()`, `trackplot_loop()` #' @examples #' region <- "chr4:3034877-3044877" -#' BPCells:::render_plot_from_storage( -#' trackplot_scalebar(region), width = 6, height = 1 -#' ) +#' scale_next_plot_height(0.3) +#' trackplot_scalebar(region) #' @export trackplot_scalebar <- function(region, font_pt=11) { region <- normalize_ranges(region) @@ -1037,6 +1090,8 @@ trackplot_helper <- function(gene, clusters, fragments, cell_read_counts, transc #' @return A plot object with aligned genome plots. Each aligned row has #' the text label, y-axis, and plot body. The relative height of each row is given #' by heights. A shared title and x-axis are put at the top. +#' @examples +#' # See ?trackplot_combine for examples #' @export #' @keywords internal draw_trackplot_grid <- function(..., labels, title = NULL, @@ -1073,7 +1128,7 @@ draw_trackplot_grid <- function(..., labels, title = NULL, new_heights[next_index] <- heights[i] next_index <- next_index + 1 } else { - plot_list <- patchwork:::get_patches(plots[[i]])$plots + plot_list <- get_patchwork_patches(plots[[i]])$plots names(plot_list) <- plots[[i]]$patchwork$labels if (is.null(names(plot_list))) { names(plot_list) <- rep("", length(plot_list)) @@ -1112,7 +1167,7 @@ draw_trackplot_grid <- function(..., labels, title = NULL, patch <- Reduce(`+`, c(labels_plots, data_plots)) + patchwork::plot_layout(ncol = 2, byrow = FALSE, widths = c(label_width, 1), heights = heights, guides = "collect") if (!is.null(title)) { - patch <- patch + patchwork::plot_annotation(title = title, theme = theme(plot.title = element_text(hjust = 0.5))) + patch <- patch + patchwork::plot_annotation(title = title, theme = ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))) } return(patch) } @@ -1131,7 +1186,7 @@ draw_trackplot_grid <- function(..., labels, title = NULL, #' @inheritParams plot_embedding #' @inheritParams convert_to_fragments #' @param region GRanges of length 1 with region to plot, or list/data.frame with -#' one entry each for chr, start, end. See `gene_region()` or [genomic-ranges] for details +#' one entry each for chr, start, end. See `gene_region()` or [genomic-ranges-like] for details #' @param fragments Fragments object #' @param cell_read_counts Numeric vector of read counts for each cell (used for normalization) #' @param bins Number of bins to plot across the region @@ -1144,6 +1199,8 @@ draw_trackplot_grid <- function(..., labels, title = NULL, #' `draw_trackplot_grid()`, the extra attribute `$patches$labels` will be added to #' specify the labels for each track. If `return_data` or `return_plot_list` is #' `TRUE`, the return value will be modified accordingly. +#' @examples +#' # See ?trackplot_coverage for examples #' @export #' @keywords internal trackplot_bulk <- function(fragments, region, groups, @@ -1254,3 +1311,109 @@ trackplot_bulk <- function(fragments, region, groups, return(plot) } } + +#' Temporarily scale the height of the next plot device +#' +#' Stores a scaling factor that `ragg_wrap()` consumes once, letting you tweak +#' the rendered height of the next PNG device without modifying the plot code. +#' +#' @param scale Numeric multiplier applied to the `height` argument the next +#' time `ragg_wrap()` is called. +#' @return Returns the previous option value (as returned by `options()`). +#' @examples +#' ## Prep data +#' ## Peaks generated from demo frags, as input into `call_peaks_tile()` +#' peaks <- tibble::tibble( +#' chr = factor(rep("chr4", 16)), +#' start = c(3041400, 3041733, 3037400, 3041933, 3040466, 3041200, +#' 3038200, 3038000, 3040266, 3037733, 3040800, 3042133, +#' 3038466, 3037200, 3043333, 3040066), +#' end = c(3041600, 3041933, 3037600, 3042133, 3040666, 3041400, +#' 3038400, 3038200, 3040466, 3037933, 3041000, 3042333, +#' 3038666, 3037400, 3043533, 3040266), +#' enrichment = c(46.4, 43.5, 28.4, 27.3, 17.3, 11.7, +#' 10.5, 7.95, 7.22, 6.86, 6.32, 6.14, +#' 5.96, 5.06, 4.51, 3.43) +#' ) +#' region <- "chr4:3034877-3044877" +#' +#' ## Plot peaks +#' scale_next_plot_height(0.3) +#' trackplot_genome_annotation(peaks, region, color_by = "enrichment") +#' @export +scale_next_plot_height <- function(scale) { + options("BPCells.scale_next_plot_height" = scale) +} + +#' Wrap `ragg::agg_png()` with optional one-shot height scaling +#' +#' Use `scale_next_plot_height()` to temporarily adjust the height of the *next* +#' call to `ragg_wrap()`. This is handy when a downstream plot (e.g., produced +#' by `trackplot_combine()`) renders too tall/short in a pipeline and you want a +#' quick scaling tweak without touching the plot code itself. The scaling factor +#' is applied once and then cleared. +#' @importFrom ragg agg_png +#' @param filename The name of the file. Follows the same semantics as the file naming in `grDevices::png()`, +#' meaning that you can provide a sprintf() compliant string format to name multiple plots (such as the default value) +#' @param width,height The dimensions of the device +#' @param units The unit width and height is measured in, +#' in either pixels `('px')`, inches `('in')`, millimeters `('mm')`, or centimeter `('cm')`. +#' @param pointsize The default pointsize of the device in pt. +#' This will in general not have any effect on grid graphics (including ggplot2) as text size is always set explicitly there. +#' @param background The background color of the device +#' @param res The resolution of the device. +#' This setting will govern how device dimensions given in inches, centimeters, or millimeters will be converted to pixels. +#' Further, it will be used to scale text sizes and linewidths +#' @param scaling A scaling factor to apply to the rendered line width and text size. Useful for getting the right dimensions +#' at the resolution that you need. If e.g. you need to render a plot at 4000x3000 pixels for it to fit into a layout, +#' but you find that the result appears to small, you can increase the `scaling` argument to make everything appear bigger at the same resolution. +#' @param snap_rect Should axis-aligned rectangles drawn with only fill snap to the pixel grid. +#' This will prevent anti-aliasing artifacts when two rectangles are touching at their border. +#' @param bitsize Should the dvice record colour as 8 or 16bit +#' @param bg Same as `background` for compatibility with old graphic device APIs` +#' @return A graphics device as returned by `ragg::agg_png()`. +#' @examples +#' # Create a simple plot using ragg_wrap +#' tmp_file <- tempfile(fileext = ".png") +#' ragg_wrap(tmp_file, width = 400, height = 300) +#' plot(1:10, 1:10) +#' dev.off() +#' +#' # Use scale_next_plot_height to adjust height +#' scale_next_plot_height(2) +#' tmp_file_scaled <- tempfile(fileext = ".png") +#' # Height will be effectively 600 +#' ragg_wrap(tmp_file_scaled, width = 400, height = 300) +#' plot(1:10, 1:10) +#' dev.off() +#' +#' # Clean up +#' unlink(c(tmp_file, tmp_file_scaled)) +#' @export +ragg_wrap <- function( + filename = "Rplot%03d.png", width = 480, height = 480, + units = "px", pointsize = 12, background = "white", res = 72, scaling = 1, snap_rect = TRUE, bitsize = 8, bg +) { + height_scale <- getOption("BPCells.scale_next_plot_height", default = 1) + options("BPCells.scale_next_plot_height" = NULL) + + # Heuristic: if dimensions are very small (<20) and units are pixels, assume inches were intended. + # This fixes issues where knitr passes inches to this custom device but defaults to px units. + if (units == "px" && width < 20 && height < 20) { + units <- "in" + } + + ragg::agg_png( + filename = filename, + width = width, + height = height * height_scale, + units = units, + pointsize = pointsize, + background = background, + res = res, + scaling = scaling, + snap_rect = snap_rect, + bitsize = bitsize, + bg = bg + ) +} diff --git a/r/R/transforms.R b/r/R/transforms.R index adcd7226..348c4d0d 100644 --- a/r/R/transforms.R +++ b/r/R/transforms.R @@ -20,9 +20,11 @@ setClass("TransformedMatrix", global_params = numeric(0) ) ) +#' @describeIn IterableMatrix-misc-methods Matrix data type for TransformedMatrix objects setMethod("matrix_type", "TransformedMatrix", function(x) "double") # Subsetting on TransformedMatrix objects +#' @describeIn IterableMatrix-misc-methods Subset TransformedMatrix results setMethod("[", "TransformedMatrix", function(x, i, j, ...) { if (missing(x)) stop("x is missing in matrix selection") # Handle transpose via recursive call @@ -63,7 +65,7 @@ setMethod("short_description", "TransformLog1p", function(x) { "Transform log1p" ) }) -#' @describeIn IterableMatrix-methods Calculate log(x + 1) +#' @describeIn IterableMatrix-methods-stats Calculate log(x + 1) #' @examples #' ####################################################################### #' ## log1p() example @@ -86,7 +88,7 @@ setMethod("short_description", "TransformLog1pSlow", function(x) { ) }) -#' @describeIn IterableMatrix-methods Calculate log(x + 1) (non-SIMD version) +#' @describeIn IterableMatrix-methods-stats Calculate log(x + 1) (non-SIMD version) #' @examples #' ####################################################################### #' ## log1p_slow() example @@ -110,7 +112,7 @@ setMethod("short_description", "TransformExpm1", function(x) { "Transform expm1" ) }) -#' @describeIn IterableMatrix-methods Calculate exp(x) - 1 +#' @describeIn IterableMatrix-methods-stats Calculate exp(x) - 1 #' @examples #' ####################################################################### #' ## expm1() example @@ -132,7 +134,7 @@ setMethod("short_description", "TransformExpm1Slow", function(x) { "Transform expm1 (non-SIMD implementation)" ) }) -#' @describeIn IterableMatrix-methods Calculate exp(x) - 1 (non-SIMD version) +#' @describeIn IterableMatrix-methods-stats Calculate exp(x) - 1 (non-SIMD version) #' @examples #' ####################################################################### #' ## expm1_slow() example @@ -172,7 +174,7 @@ setMethod("short_description", "TransformPow", function(x) { ) }) -#' @describeIn IterableMatrix-methods Calculate x^y (elementwise; y > 0) +#' @describeIn IterableMatrix-methods-ops Calculate x^y (elementwise; y > 0) setMethod("^", signature(e1 = "IterableMatrix", e2 = "numeric"), function(e1, e2) { assert_len(e2, 1) assert_true(e2 > 0) @@ -256,6 +258,7 @@ setMethod("short_description", "TransformMinByRow", function(x) { }) #' @rdname min_elementwise +#' @param vals Numeric vector of positive values, with length equal to the number of rows (min_by_row) or columns (min_by_col) #' @description **min_by_row**: Take the minimum with a per-row constant #' @examples #' ####################################################################### @@ -351,6 +354,17 @@ setMethod("short_description", "TransformBinarize", function(x) { #' comparison to the threshold is >= (strict_inequality=FALSE) #' or > (strict_inequality=TRUE). #' @return binarized IterableMatrix object +#' @examples +#' set.seed(12345) +#' mat <- matrix(rpois(40, lambda = 5), nrow = 4) +#' rownames(mat) <- paste0("gene", 1:4) +#' mat <- as(mat, "dgCMatrix") %>% as("IterableMatrix") +#' +#' ####################################################################### +#' ## binarize() example +#' ####################################################################### +#' binarize(mat, threshold = 4) %>% as("dgCMatrix") +#' #' @export binarize <- function(mat, threshold=0, strict_inequality=TRUE) { assert_is(mat, "IterableMatrix") @@ -367,22 +381,21 @@ binarize <- function(mat, threshold=0, strict_inequality=TRUE) { global_params=c(threshold, strict_inequality)) convert_matrix_type(res, "uint32_t") } - +#' @describeIn IterableMatrix-methods-ops-misc Perform matrix < numeric comparison (IterableMatrix left) setMethod("<", signature(e1= "IterableMatrix", e2= "numeric"), function(e1, e2) { stop("matrix < numeric not supported for IterableMatrix objects") }) -#' @describeIn IterableMatrix-methods Binarize matrix according to numeric < matrix comparison +#' @describeIn IterableMatrix-methods-ops Perform numeric < matrix comparison (numeric left) +#' @param e1 First element of comparison +#' @param e2 Second element of comparison #' @examples -#' ####################################################################### -#' ## `e1 < e2` example -#' ####################################################################### #' 5 < mat #' #' setMethod("<", signature(e1= "numeric", e2= "IterableMatrix"), function(e1, e2) { binarize(e2, threshold=e1, strict_inequality=TRUE) }) -#' @describeIn IterableMatrix-methods Binarize matrix according to matrix > numeric comparison +#' @describeIn IterableMatrix-methods-ops Perform matrix > numeric comparison (IterableMatrix left) #' @examples #' ####################################################################### #' ## `e1 > e2` example @@ -393,14 +406,15 @@ setMethod("<", signature(e1= "numeric", e2= "IterableMatrix"), function(e1, e2) setMethod(">", signature(e1= "IterableMatrix", e2= "numeric"), function(e1, e2) { binarize(e1, threshold=e2, strict_inequality=TRUE) }) +#' @describeIn IterableMatrix-methods-ops-misc Perform numeric > matrix comparison (numeric left) setMethod(">", signature(e1= "numeric", e2= "IterableMatrix"), function(e1, e2) { stop("numeric > matrix not supported for IterableMatrix objects") }) - +#' @describeIn IterableMatrix-methods-ops-misc Perform matrix <= numeric comparison (IterableMatrix left) setMethod("<=", signature(e1= "IterableMatrix", e2= "numeric"), function(e1, e2) { stop("matrix <= numeric not supported for IterableMatrix objects") }) -#' @describeIn IterableMatrix-methods Binarize matrix according to numeric <= matrix comparison +#' @describeIn IterableMatrix-methods-ops Perform numeric <= matrix comparison (numeric left) #' @examples #' ####################################################################### #' ## `e1 <= e2` example @@ -411,7 +425,7 @@ setMethod("<=", signature(e1= "IterableMatrix", e2= "numeric"), function(e1, e2) setMethod("<=", signature(e1= "numeric", e2= "IterableMatrix"), function(e1, e2) { binarize(e2, threshold=e1, strict_inequality=FALSE) }) -#' @describeIn IterableMatrix-methods Binarize matrix according to matrix >= numeric comparison +#' @describeIn IterableMatrix-methods-ops Perform matrix >= numeric comparison (IterableMatrix left) #' @examples #' ####################################################################### #' ## `e1 >= e2` example @@ -422,6 +436,7 @@ setMethod("<=", signature(e1= "numeric", e2= "IterableMatrix"), function(e1, e2) setMethod(">=", signature(e1= "IterableMatrix", e2= "numeric"), function(e1, e2) { binarize(e1, threshold=e2, strict_inequality=FALSE) }) +#' @describeIn IterableMatrix-methods-ops-misc Compare a numeric value to an IterableMatrix using >= (numeric left operand) setMethod(">=", signature(e1= "numeric", e2= "IterableMatrix"), function(e1, e2) { stop("numeric >= matrix not supported for IterableMatrix objects") }) @@ -444,7 +459,7 @@ setMethod("short_description", "TransformRound", function(x) { }) # Initially, allow only digits=0. -#' @describeIn IterableMatrix-methods round to nearest integer (digits must be 0) +#' @describeIn IterableMatrix-methods-ops round to nearest integer (digits must be 0) #' @examples #' ####################################################################### #' ## round() example @@ -530,6 +545,20 @@ setMethod("short_description", "SCTransformPearsonTransposeSlow", function(x) { #' @param columns_are_cells Whether the columns of the matrix correspond to cells (default) or genes #' @param slow If TRUE, use a 10x slower but more precise implementation (default FALSE) #' @return IterableMatrix +#' @examples +#' set.seed(12345) +#' mat <- matrix(rpois(1000 * 100, lambda = 0.5), nrow = 1000, ncol = 100) +#' rownames(mat) <- paste0("gene", 1:1000) +#' colnames(mat) <- paste0("cell", 1:100) +#' mat <- as(mat, "dgCMatrix") %>% as("IterableMatrix") +#' +#' # Calculate dummy parameters +#' gene_theta <- runif(1000, 0.1, 10) +#' gene_beta <- runif(1000, 0.1, 10) +#' cell_read_counts <- runif(100, 1000, 10000) +#' +#' res <- sctransform_pearson(mat, gene_theta, gene_beta, cell_read_counts) +#' #' @export sctransform_pearson <- function(mat, gene_theta, gene_beta, cell_read_counts, min_var = -Inf, clip_range = c(-10, 10), columns_are_cells=TRUE, slow=FALSE) { assert_is(mat, "IterableMatrix") @@ -707,7 +736,7 @@ setMethod("short_description", "TransformScaleShift", function(x) { }) # Basic dispatch for scaling/shifting (Create TransformScaleShift and then apply function to it) -#' @describeIn IterableMatrix-methods Multiply by a constant, or multiply rows by a vector length nrow(mat) +#' @describeIn IterableMatrix-methods-ops Multiply by a constant, or multiply rows by a vector length nrow(mat) #' @examples #' ####################################################################### #' ## `e1 * e2` example @@ -723,11 +752,12 @@ setMethod("*", signature(e1 = "IterableMatrix", e2 = "numeric"), function(e1, e2 e1 <- wrapMatrix("TransformScaleShift", convert_matrix_type(e1, "double")) e1 * e2 }) +#' @describeIn IterableMatrix-methods-ops-misc Multiply an IterableMatrix by a numeric value or row-wise vector (numeric left operand) setMethod("*", signature(e1 = "numeric", e2 = "IterableMatrix"), function(e1, e2) { e2 <- wrapMatrix("TransformScaleShift", convert_matrix_type(e2, "double")) e2 * e1 }) -#' @describeIn IterableMatrix-methods Add a constant, or row-wise addition with a vector length nrow(mat) +#' @describeIn IterableMatrix-methods-ops Add a constant, or row-wise addition with a vector length nrow(mat) #' @examples #' ####################################################################### #' ## `e1 + e2` example @@ -744,13 +774,14 @@ setMethod("+", signature(e1 = "IterableMatrix", e2 = "numeric"), function(e1, e2 e1 <- wrapMatrix("TransformScaleShift", convert_matrix_type(e1, "double")) e1 + e2 }) +#' @describeIn IterableMatrix-methods-ops-misc Add an IterableMatrix to a numeric value or row-wise vector (numeric left operand) setMethod("+", signature(e1 = "numeric", e2 = "IterableMatrix"), function(e1, e2) { if (all(e1 == 0)) return(e2) e2 <- wrapMatrix("TransformScaleShift", convert_matrix_type(e2, "double")) e2 + e1 }) # Note: we skip numeric / IterableMatrix as it would result in a lot of infinities for dividing by 0. -#' @describeIn IterableMatrix-methods Divide by a constant, or divide rows by a vector length nrow(mat) +#' @describeIn IterableMatrix-methods-ops Divide by a constant, or divide rows by a vector length nrow(mat) #' @examples #' ####################################################################### #' ## `e1 / e2` example @@ -765,7 +796,7 @@ setMethod("+", signature(e1 = "numeric", e2 = "IterableMatrix"), function(e1, e2 setMethod("/", signature(e1 = "IterableMatrix", e2 = "numeric"), function(e1, e2) { e1 * (1 / e2) }) -#' @describeIn IterableMatrix-methods Subtract a constant, or row-wise subtraction with a vector length nrow(mat) +#' @describeIn IterableMatrix-methods-ops Subtract a constant, or row-wise subtraction with a vector length nrow(mat) #' @examples #' ####################################################################### #' ## `e1 - e2` example @@ -780,11 +811,13 @@ setMethod("/", signature(e1 = "IterableMatrix", e2 = "numeric"), function(e1, e2 setMethod("-", signature(e1 = "IterableMatrix", e2 = "numeric"), function(e1, e2) { e1 + (-e2) }) +#' @describeIn IterableMatrix-methods-ops-misc Subtract matrix from a numeric constant/vector setMethod("-", signature(e1 = "numeric", e2 = "IterableMatrix"), function(e1, e2) { e2 * -1 + e1 }) # Full dispatch for scaling/shifting +#' @describeIn IterableMatrix-misc-methods Scale TransformScaleShift results by numeric values setMethod("*", signature(e1 = "TransformScaleShift", e2 = "numeric"), function(e1, e2) { # Convenience renaming - x is matrix, y is vector/scalar x <- e1 @@ -856,6 +889,7 @@ setMethod("*", signature(e1 = "TransformScaleShift", e2 = "numeric"), function(e } return(x) }) +#' @describeIn IterableMatrix-misc-methods Shift TransformScaleShift results by numeric values setMethod("+", signature(e1 = "TransformScaleShift", e2 = "numeric"), function(e1, e2) { if (all(e2 == 0)) return(e1) # Convenience renaming - x is matrix, y is vector/scalar @@ -894,9 +928,11 @@ setMethod("+", signature(e1 = "TransformScaleShift", e2 = "numeric"), function(e return(x) }) # Just take advantage of commutative property to only implement half +#' @describeIn IterableMatrix-misc-methods Apply numeric scaling on the left to TransformScaleShift results setMethod("*", signature(e1 = "numeric", e2 = "TransformScaleShift"), function(e1, e2) { e2 * e1 }) +#' @describeIn IterableMatrix-misc-methods Add TransformScaleShift results to numeric values (numeric left operand) setMethod("+", signature(e1 = "numeric", e2 = "TransformScaleShift"), function(e1, e2) { e2 + e1 }) @@ -1051,6 +1087,21 @@ setMethod("short_description", "TransformLinearResidual", function(x) { #' (e.g. the gene axis in typical single cell analysis). Options include "row" (default) and "col". #' #' @return IterableMatrix +#' @examples +#' set.seed(12345) +#' mat <- matrix(rnorm(1000 * 100), nrow = 1000, ncol = 100) +#' rownames(mat) <- paste0("gene", 1:1000) +#' colnames(mat) <- paste0("cell", 1:100) +#' mat <- as(mat, "dgCMatrix") %>% as("IterableMatrix") +#' +#' latent_data <- data.frame( +#' batch = sample(c("A", "B"), 100, replace = TRUE), +#' age = rnorm(100, mean = 30, sd = 10) +#' ) +#' +#' # Regress out batch and age +#' res <- regress_out(mat, latent_data, prediction_axis = "row") +#' #' @export regress_out <- function(mat, latent_data, prediction_axis = c("row", "col")) { prediction_axis <- match.arg(prediction_axis) diff --git a/r/cleanup b/r/cleanup index 77521205..a3ec45c2 100755 --- a/r/cleanup +++ b/r/cleanup @@ -6,4 +6,12 @@ fi if [ -f tools/h5write ]; then rm tools/h5write +fi + +if [ -f tools/highway/lib ]; then + rm -r tools/highway/lib +fi + +if [ -f tools/cxx17_filesystem ]; then + rm tools/cxx17_filesystem fi \ No newline at end of file diff --git a/r/cleanup.win b/r/cleanup.win new file mode 100644 index 00000000..0d966c71 --- /dev/null +++ b/r/cleanup.win @@ -0,0 +1,21 @@ +#!/bin/sh + +if [ -f src/Makevars ]; then + rm src/Makevars +fi + +if [ -f tools/h5write ]; then + rm tools/h5write +fi + +if [ -f tools/h5write.exe ]; then + rm tools/h5write.exe +fi + +if [ -f tools/highway/lib ]; then + rm -r tools/highway/lib +fi + +if [ -f tools/cxx17_filesystem]; then + rm tools/cxx17_filesystem +fi \ No newline at end of file diff --git a/r/configure b/r/configure index d6af5179..ebbf6fc6 100755 --- a/r/configure +++ b/r/configure @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh if [ -z $BPCELLS_DEBUG_INSTALL ]; then exec 3>/dev/null @@ -16,7 +16,7 @@ fi # or set the CI or ENABLE_INSTALL_COUNTING environment variables ahead of installation. if [ -z "$CI" ]; then ENABLE_INSTALL_COUNTING=${ENABLE_INSTALL_COUNTING:-yes}; fi -if [ "$ENABLE_INSTALL_COUNTING" == "yes" ]; then +if [ "$ENABLE_INSTALL_COUNTING" = "yes" ]; then curl --silent "https://plausible.benparks.net/flask-plausible/bpcells-configure" > /dev/null 2> /dev/null \ || true echo "Recording install count metrics" @@ -150,6 +150,7 @@ if [ -z $CXX17_OK ]; then printf "\n\nUnable to compile program with C++17 std::filesystem.\nPlease install a newer compiler version and set CC and CXX in ~/.R/Makevars\n" exit 1 fi +rm -f tools/cxx17_filesystem ############################ # Build Highway SIMD library @@ -158,7 +159,9 @@ printf "\nTesting availability of highway SIMD library...\n" HWY_OK="" # Minimum required version (may override via env var HWY_MIN_VERSION=X.Y.Z) HWY_MIN_VERSION=${HWY_MIN_VERSION:-1.0.5} -IFS=. read -r HWY_MIN_MAJOR HWY_MIN_MINOR HWY_MIN_PATCH <<< "$HWY_MIN_VERSION" +HWY_MIN_MAJOR=$(echo "$HWY_MIN_VERSION" | cut -d. -f1) +HWY_MIN_MINOR=$(echo "$HWY_MIN_VERSION" | cut -d. -f2) +HWY_MIN_PATCH=$(echo "$HWY_MIN_VERSION" | cut -d. -f3) sed \ -e "s/@HWY_MIN_MAJOR@/$HWY_MIN_MAJOR/g" \ -e "s/@HWY_MIN_MINOR@/$HWY_MIN_MINOR/g" \ @@ -187,14 +190,16 @@ else HWY_LIBS="" fi # Only attempt second compile if pkg-config succeeded - if [ -n "$HWY_LIBS" ] && $CXX tools/hwy-test.cpp -o tools/hwy-test $CXXFLAGS $LDFLAGS $HWY_CFLAGS $HWY_LIBS > /dev/null 2> "$HWY_COMPILE_LOG"; then - HWY_VERSION_OUTPUT=$(tools/hwy-test) - printf "$HWY_VERSION_OUTPUT" - HWY_OK="yes" - elif grep -q "Highway too old" "$HWY_COMPILE_LOG"; then + if [ -n "$HWY_LIBS" ]; then + if $CXX tools/hwy-test.cpp -o tools/hwy-test $CXXFLAGS $LDFLAGS $HWY_CFLAGS $HWY_LIBS > /dev/null 2> "$HWY_COMPILE_LOG"; then + HWY_VERSION_OUTPUT=$(tools/hwy-test) + printf "$HWY_VERSION_OUTPUT" + HWY_OK="yes" + elif grep -q "Highway too old" "$HWY_COMPILE_LOG"; then printf "\nHighway is installed but too old: %s\n" "$(sed -n 's/.*#pragma message: //p' "$HWY_COMPILE_LOG")" - else + else printf "Highway not found or unusable\n" + fi fi fi rm -f tools/hwy-test "$HWY_COMPILE_LOG" tools/hwy-test.cpp @@ -202,7 +207,7 @@ rm -f tools/hwy-test "$HWY_COMPILE_LOG" tools/hwy-test.cpp if [ -z $HWY_OK ]; then if [ ! -f tools/highway/lib/libhwy.a ]; then printf "\nBuilding highway SIMD library from source\n" - CXX=$CXX bash src/vendor/highway/manual-build/build_highway.sh src/vendor/highway tools/highway && HWY_OK="yes"; + CXX=$CXX sh src/vendor/highway/manual-build/build_highway.sh src/vendor/highway tools/highway && HWY_OK="yes"; if [ -z $HWY_OK ]; then printf "\n\nUnable to build highway SIMD library from source\n" exit 1 @@ -217,6 +222,7 @@ else printf "\nFound working highway SIMD library\n" fi + # Make substitutions in Makevars.in sed \ -e "s|%HDF5_CFLAGS%|${HDF5_CFLAGS}|g" \ @@ -228,7 +234,10 @@ sed \ -e "s|%ENV_LDFLAGS%|${ENV_LDFLAGS}|g" \ src/Makevars.in > src/Makevars -if [ "$ENABLE_INSTALL_COUNTING" == "yes" ]; then +# # Clean up highway build directory to avoid CRAN NOTE about object files +# # The highway library will be rebuilt as needed during installation + +if [ "$ENABLE_INSTALL_COUNTING" = "yes" ]; then curl --silent https://plausible.benparks.net/flask-plausible/bpcells-configure-success > /dev/null 2> /dev/null \ || true fi diff --git a/r/configure.win b/r/configure.win index 396abbb9..15bd8d16 100755 --- a/r/configure.win +++ b/r/configure.win @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh if [ -z $BPCELLS_DEBUG_INSTALL ]; then exec 3>/dev/null @@ -15,7 +15,7 @@ fi # or set the CI or ENABLE_INSTALL_COUNTING environment variables ahead of installation. if [ -z "$CI" ]; then ENABLE_INSTALL_COUNTING=${ENABLE_INSTALL_COUNTING:-yes}; fi -if [ "$ENABLE_INSTALL_COUNTING" == "yes" ]; then +if [ "$ENABLE_INSTALL_COUNTING" = "yes" ]; then curl --silent "https://plausible.benparks.net/flask-plausible/bpcells-configure" > /dev/null 2> /dev/null \ || true echo "Recording install count metrics" @@ -95,6 +95,7 @@ if [ -z $CXX17_OK ]; then printf "\n\nUnable to compile program with C++17 std::filesystem.\nPlease install a newer compiler version and set CC and CXX in ~/.R/Makevars\n" exit 1 fi +rm -f tools/cxx17_filesystem ############################ # Build Highway SIMD library @@ -179,7 +180,7 @@ sed \ -e "s|%ENV_LDFLAGS%|${ENV_LDFLAGS}|g" \ src/Makevars.in > src/Makevars -if [ "$ENABLE_INSTALL_COUNTING" == "yes" ]; then +if [ "$ENABLE_INSTALL_COUNTING" = "yes" ]; then curl --silent https://plausible.benparks.net/flask-plausible/bpcells-configure-success > /dev/null 2> /dev/null \ || true fi diff --git a/r/data-raw/human_gene_mapping.R b/r/data-raw/human_gene_mapping.R index 91657800..a7337b05 100644 --- a/r/data-raw/human_gene_mapping.R +++ b/r/data-raw/human_gene_mapping.R @@ -11,7 +11,7 @@ library(magrittr) # Pull data from HGNC, and make a named vector that maps non-canonical gene names/ # symbols to their canonical names. Only unambiguous mappings will be stored hgnc <- readr::read_tsv( - "http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/non_alt_loci_set.txt", + "https://ftp.ebi.ac.uk/pub/databases/genenames/out_of_date_hgnc/tsv/non_alt_loci_set.txt", col_types=readr::cols(.default=readr::col_character()) ) diff --git a/r/data-raw/internal_data.R b/r/data-raw/internal_data.R new file mode 100644 index 00000000..5abb7609 --- /dev/null +++ b/r/data-raw/internal_data.R @@ -0,0 +1,37 @@ +# Copyright 2026 BPCells contributors +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# Pulls transcripts from gencode, filters to only chr4 and saves as package data. +# Also runs qc_scATAC on the demo fragments filtered to chr4 and saves as package data. + +library(dplyr) +library(BPCells) + + +# Get transcripts +transcripts <- read_gencode_transcripts( + file.path(tempdir(), "references"), release = "42", + annotation_set = "basic", + features = "transcript" +) +transcripts_filtered_example_chr_4 <- transcripts %>% dplyr::filter(chr %in% "chr4") + + + +frags <- get_demo_frags() %>% select_chromosomes("chr4") +blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") +qc_results_filtered_example_chr_4 <- qc_scATAC(frags, transcripts_filtered_example_chr_4, blacklist) +readr::write_delim( + transcripts_filtered_example_chr_4, file.path("./inst/extdata/transcripts_filtered_example_chr_4.tsv.gz"), + delim = "\t" +) +readr::write_delim( + qc_results_filtered_example_chr_4, file.path("./inst/extdata/qc_results_filtered_example_chr_4.tsv.gz"), + delim = "\t" +) + diff --git a/r/data-raw/mouse_gene_mapping.R b/r/data-raw/mouse_gene_mapping.R index fef4b6e5..2684d0d5 100644 --- a/r/data-raw/mouse_gene_mapping.R +++ b/r/data-raw/mouse_gene_mapping.R @@ -15,14 +15,14 @@ library(dplyr) # A full list of non-withdrawn gene IDs mgi_list <- readr::read_tsv( - "http://www.informatics.jax.org/downloads/reports/MRK_List2.rpt" + "https://www.informatics.jax.org/downloads/reports/MRK_List2.rpt" ) %>% select(mgi_id = "MGI Accession ID", symbol = "Marker Symbol", status = "Status", name = "Marker Name", type = "Marker Type", alias = "Marker Synonyms (pipe-separated)") # Contains some withdrawn IDs, and a superset of the current mgi_list mgi_entrez <- readr::read_tsv( - "http://www.informatics.jax.org/downloads/reports/MGI_EntrezGene.rpt", + "https://www.informatics.jax.org/downloads/reports/MGI_EntrezGene.rpt", col_names = c("mgi_id", "symbol", "status", "name", "cM_pos", "chr", "type", "secondary_ids", "entrez_id", "alias", "feature_type", "start", "end", "strand", "biotype") ) %>% select(mgi_id, symbol, status, name, secondary_ids, entrez_id, alias) %>% @@ -44,7 +44,7 @@ mgi_entrez_mapping <- select(mgi_entrez, symbol, alt = alias) %>% stopifnot(all.equal(mgi_list_mapping, mgi_entrez_mapping)) mgi_ensembl <- readr::read_tsv( - "http://www.informatics.jax.org/downloads/reports/MRK_ENSEMBL.rpt", + "https://www.informatics.jax.org/downloads/reports/MRK_ENSEMBL.rpt", col_names = c("mgi_id", "symbol", "name", "cM_pos", "chr", "ensembl_id", "ensembl_transcript", "ensembl_prot", "feature_type", "start", "end", "strand", "biotypes") ) %>% select(mgi_id, symbol, name, ensembl_id) diff --git a/r/inst/extdata/qc_results_filtered_example_chr_4.tsv.gz b/r/inst/extdata/qc_results_filtered_example_chr_4.tsv.gz new file mode 100644 index 00000000..e27a819f Binary files /dev/null and b/r/inst/extdata/qc_results_filtered_example_chr_4.tsv.gz differ diff --git a/r/inst/extdata/transcripts_filtered_example_chr_4.tsv.gz b/r/inst/extdata/transcripts_filtered_example_chr_4.tsv.gz new file mode 100644 index 00000000..f0dad3a1 Binary files /dev/null and b/r/inst/extdata/transcripts_filtered_example_chr_4.tsv.gz differ diff --git a/r/man/IterableFragments-methods.Rd b/r/man/IterableFragments-methods.Rd index 20b89712..a98e68de 100644 --- a/r/man/IterableFragments-methods.Rd +++ b/r/man/IterableFragments-methods.Rd @@ -4,27 +4,52 @@ \alias{IterableFragments-methods} \alias{show,IterableFragments-method} \alias{cellNames} +\alias{cellNames,IterableFragments-method} \alias{cellNames<-} +\alias{cellNames<-,IterableFragments-method} \alias{chrNames} +\alias{chrNames,IterableFragments-method} \alias{chrNames<-} +\alias{chrNames<-,IterableFragments-method} +\alias{as.data.frame,IterableFragments-method} +\alias{c,IterableFragments-method} \title{IterableFragments methods} \usage{ \S4method{show}{IterableFragments}(object) cellNames(x) +\S4method{cellNames}{IterableFragments}(x) + cellNames(x, ...) <- value +\S4method{cellNames}{IterableFragments}(x, ...) <- value + chrNames(x) +\S4method{chrNames}{IterableFragments}(x) + chrNames(x, ...) <- value + +\S4method{chrNames}{IterableFragments}(x, ...) <- value + +\S4method{as.data.frame}{IterableFragments}(x, row.names = NULL, optional = FALSE, ...) + +\S4method{c}{IterableFragments}(x, ...) } \arguments{ \item{object}{IterableFragments object} \item{x}{an IterableFragments object} +\item{...}{Additional arguments (not used)} + \item{value}{Character vector of new names} + +\item{row.names}{Optional row names for compatibility with \code{as.data.frame()}. +Ignored by IterableFragments methods.} + +\item{optional}{Logical flag for compatibility with \code{as.data.frame()}; ignored.} } \value{ \itemize{ @@ -53,12 +78,24 @@ Methods for IterableFragments objects \item \code{cellNames()}: Get cell names +\item \code{cellNames(IterableFragments)}: Get cell names for IterableFragments + \item \code{cellNames(x, ...) <- value}: Set cell names +\item \code{cellNames(IterableFragments) <- value}: Set cell names for IterableFragments + \item \code{chrNames()}: Set chromosome names +\item \code{chrNames(IterableFragments)}: Get chromosome names for IterableFragments + \item \code{chrNames(x, ...) <- value}: Set chromosome names +\item \code{chrNames(IterableFragments) <- value}: Set chromosome names for IterableFragments + +\item \code{as.data.frame(IterableFragments)}: Coerce IterableFragments to a data.frame + +\item \code{c(IterableFragments)}: Concatenate IterableFragments objects + }} \examples{ ## Prep data diff --git a/r/man/IterableFragments-misc-methods.Rd b/r/man/IterableFragments-misc-methods.Rd new file mode 100644 index 00000000..0992a531 --- /dev/null +++ b/r/man/IterableFragments-misc-methods.Rd @@ -0,0 +1,119 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fragments.R +\docType{methods} +\name{chrNames,FragmentsTsv-method} +\alias{chrNames,FragmentsTsv-method} +\alias{cellNames,FragmentsTsv-method} +\alias{chrNames,UnpackedMemFragments-method} +\alias{cellNames,UnpackedMemFragments-method} +\alias{chrNames,PackedMemFragments-method} +\alias{cellNames,PackedMemFragments-method} +\alias{chrNames,FragmentsDir-method} +\alias{cellNames,FragmentsDir-method} +\alias{chrNames,FragmentsHDF5-method} +\alias{cellNames,FragmentsHDF5-method} +\alias{IterableFragments-misc-methods} +\alias{chrNames,ChrSelectName-method} +\alias{chrNames,ChrSelectIndex-method} +\alias{cellNames,CellSelectName-method} +\alias{cellNames,CellSelectIndex-method} +\alias{cellNames,CellMerge-method} +\alias{chrNames,ChrRename-method} +\alias{cellNames,CellRename-method} +\alias{cellNames,CellPrefix-method} +\alias{chrNames,MergeFragments-method} +\alias{cellNames,MergeFragments-method} +\title{IterableFragments subclass methods} +\usage{ +\S4method{chrNames}{FragmentsTsv}(x) + +\S4method{cellNames}{FragmentsTsv}(x) + +\S4method{chrNames}{UnpackedMemFragments}(x) + +\S4method{cellNames}{UnpackedMemFragments}(x) + +\S4method{chrNames}{PackedMemFragments}(x) + +\S4method{cellNames}{PackedMemFragments}(x) + +\S4method{chrNames}{FragmentsDir}(x) + +\S4method{cellNames}{FragmentsDir}(x) + +\S4method{chrNames}{FragmentsHDF5}(x) + +\S4method{cellNames}{FragmentsHDF5}(x) + +\S4method{chrNames}{ChrSelectName}(x) + +\S4method{chrNames}{ChrSelectIndex}(x) + +\S4method{cellNames}{CellSelectName}(x) + +\S4method{cellNames}{CellSelectIndex}(x) + +\S4method{cellNames}{CellMerge}(x) + +\S4method{chrNames}{ChrRename}(x) + +\S4method{cellNames}{CellRename}(x) + +\S4method{cellNames}{CellPrefix}(x) + +\S4method{chrNames}{MergeFragments}(x) + +\S4method{cellNames}{MergeFragments}(x) +} +\arguments{ +\item{x}{An object inheriting from \code{IterableFragments}.} +} +\description{ +Methods defined for classes that extend \code{IterableFragments}, providing access +to metadata or specialised behaviours for storage backends and selection +wrappers. +} +\section{Functions}{ +\itemize{ +\item \code{chrNames(FragmentsTsv)}: Get chromosome names for FragmentsTsv + +\item \code{cellNames(FragmentsTsv)}: Get cell names for FragmentsTsv + +\item \code{chrNames(UnpackedMemFragments)}: Get chromosome names for UnpackedMemFragments + +\item \code{cellNames(UnpackedMemFragments)}: Get cell names for UnpackedMemFragments + +\item \code{chrNames(PackedMemFragments)}: Get chromosome names for PackedMemFragments + +\item \code{cellNames(PackedMemFragments)}: Get cell names for PackedMemFragments + +\item \code{chrNames(FragmentsDir)}: Get chromosome names for FragmentsDir + +\item \code{cellNames(FragmentsDir)}: Get cell names for FragmentsDir + +\item \code{chrNames(FragmentsHDF5)}: Get chromosome names for FragmentsHDF5 + +\item \code{cellNames(FragmentsHDF5)}: Get cell names for FragmentsHDF5 + +\item \code{chrNames(ChrSelectName)}: Get chromosome names for ChrSelectName + +\item \code{chrNames(ChrSelectIndex)}: Get chromosome names for ChrSelectIndex + +\item \code{cellNames(CellSelectName)}: Get cell names for CellSelectName + +\item \code{cellNames(CellSelectIndex)}: Get cell names for CellSelectIndex + +\item \code{cellNames(CellMerge)}: Get cell names for CellMerge + +\item \code{chrNames(ChrRename)}: Get chromosome names for ChrRename + +\item \code{cellNames(CellRename)}: Get cell names for CellRename + +\item \code{cellNames(CellPrefix)}: Get cell names for CellPrefix + +\item \code{chrNames(MergeFragments)}: Get chromosome names for MergeFragments + +\item \code{cellNames(MergeFragments)}: Get cell names for MergeFragments + +}} +\keyword{internal} diff --git a/r/man/IterableMatrix-matrixgenerics.Rd b/r/man/IterableMatrix-matrixgenerics.Rd new file mode 100644 index 00000000..ccb3bf94 --- /dev/null +++ b/r/man/IterableMatrix-matrixgenerics.Rd @@ -0,0 +1,336 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R, R/matrix_stats.R +\docType{methods} +\name{colVars.default} +\alias{colVars.default} +\alias{colVars.IterableMatrix} +\alias{rowVars.default} +\alias{rowVars.IterableMatrix} +\alias{rowMaxs.default} +\alias{rowMaxs.IterableMatrix} +\alias{colMaxs.default} +\alias{colMaxs.IterableMatrix} +\alias{rowQuantiles.default} +\alias{rowQuantiles.IterableMatrix} +\alias{colQuantiles.default} +\alias{colQuantiles.IterableMatrix} +\alias{IterableMatrix-matrixgenerics} +\alias{rowQuantiles,IterableMatrix-method} +\alias{colQuantiles,IterableMatrix-method} +\alias{rowVars,IterableMatrix-method} +\alias{colVars,IterableMatrix-method} +\alias{rowMaxs,IterableMatrix-method} +\alias{colMaxs,IterableMatrix-method} +\title{MatrixGenerics methods for IterableMatrix} +\usage{ +\method{colVars}{default}( + x, + rows = NULL, + cols = NULL, + na.rm = FALSE, + center = NULL, + ..., + useNames = TRUE +) + +colVars.IterableMatrix( + x, + rows = NULL, + cols = NULL, + na.rm = FALSE, + center = NULL, + ..., + useNames = TRUE +) + +\method{rowVars}{default}( + x, + rows = NULL, + cols = NULL, + na.rm = FALSE, + center = NULL, + ..., + useNames = TRUE +) + +rowVars.IterableMatrix( + x, + rows = NULL, + cols = NULL, + na.rm = FALSE, + center = NULL, + ..., + useNames = TRUE +) + +\method{rowMaxs}{default}(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) + +rowMaxs.IterableMatrix( + x, + rows = NULL, + cols = NULL, + na.rm = FALSE, + ..., + useNames = TRUE +) + +\method{colMaxs}{default}(x, rows = NULL, cols = NULL, na.rm = FALSE, ..., useNames = TRUE) + +colMaxs.IterableMatrix( + x, + rows = NULL, + cols = NULL, + na.rm = FALSE, + ..., + useNames = TRUE +) + +\method{rowQuantiles}{default}( + x, + rows = NULL, + cols = NULL, + probs = seq(from = 0, to = 1, by = 0.25), + na.rm = FALSE, + type = 7L, + digits = 7L, + ..., + useNames = TRUE, + drop = TRUE +) + +rowQuantiles.IterableMatrix( + x, + rows = NULL, + cols = NULL, + probs = seq(from = 0, to = 1, by = 0.25), + na.rm = FALSE, + type = 7L, + digits = 7L, + ..., + useNames = TRUE, + drop = TRUE +) + +\method{colQuantiles}{default}( + x, + rows = NULL, + cols = NULL, + probs = seq(from = 0, to = 1, by = 0.25), + na.rm = FALSE, + type = 7L, + digits = 7L, + ..., + useNames = TRUE, + drop = TRUE +) + +colQuantiles.IterableMatrix( + x, + rows = NULL, + cols = NULL, + probs = seq(from = 0, to = 1, by = 0.25), + na.rm = FALSE, + type = 7L, + digits = 7L, + ..., + useNames = TRUE, + drop = TRUE +) +} +\arguments{ +\item{x}{An \code{IterableMatrix}.} + +\item{rows}{(Integer) Optional vector of row indices to operate over.} + +\item{cols}{(Integer) Optional vector of column indices to operate over.} + +\item{na.rm}{(Logical) Should missing values (NA) be removed?} + +\item{center}{Optional center values (vector of length nrow(x) or ncol(x))} + +\item{...}{Passed to the underlying implementation.} + +\item{useNames}{(Logical) Whether to use row and column names in the output.} + +\item{probs}{(Numeric) Quantile value(s) to be computed, between 0 and 1.} + +\item{type}{(Integer) between 4 and 9 selecting which quantile algorithm to use, detailed in \code{matrixStats::rowQuantiles()}} + +\item{digits}{Number of decimal places for quantile calculations} + +\item{drop}{(Logical) If TRUE and only one quantile is requested, the result is coerced to a vector (For non-BPCells objects).} +} +\value{ +\itemize{ +\item \verb{colVars()}: vector of col variance +} + +\itemize{ +\item \verb{colVars()}: vector of col variance +} + +\itemize{ +\item \verb{rowVars()}: vector of row variance +} + +\itemize{ +\item \verb{rowVars()}: vector of row variance +} + +\itemize{ +\item \verb{rowMaxs()}: vector of row maxs +} + +\itemize{ +\item \verb{rowMaxs()}: vector of row maxs +} + +\itemize{ +\item \verb{colMaxs()}: vector of col maxs +} + +\itemize{ +\item \verb{colMaxs()}: vector of col maxs +} + +\itemize{ +\item \verb{rowQuantiles()}: If \code{length(probs) == 1}, return a numeric with number of entries equal to the number of rows in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a row in the input matrix. +} + +\itemize{ +\item \verb{rowQuantiles()}: If \code{length(probs) == 1}, return a numeric with number of entries equal to the number of rows in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a row in the input matrix. +} + +\itemize{ +\item \verb{colQuantiles()}: If \code{length(probs) == 1}, return a numeric with number of entries equal to the number of columns in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a col in the input matrix. +} + +\itemize{ +\item \verb{colQuantiles()}: If \code{length(probs) == 1}, return a numeric with number of entries equal to the number of columns in the matrix. Else, return a Matrix of quantile values, with cols representing each quantile, and each row representing a col in the input matrix. +} +} +\description{ +S4 methods enabling MatrixGenerics generics (e.g., \code{rowQuantiles}, +\code{colQuantiles}, \code{rowVars}, \code{colVars}, \code{rowMaxs}, \code{colMaxs}) +to operate on \code{IterableMatrix}. These are registered at runtime only +when MatrixGenerics is available. +} +\section{Functions}{ +\itemize{ +\item \code{colVars(default)}: Calculate colVars (replacement for \code{matrixStats::colVars()}) + +\item \code{colVars.IterableMatrix()}: Calculate colVars (replacement for \code{matrixStats::colVars()}) + +\item \code{rowVars(default)}: Calculate rowVars (replacement for \code{matrixStats::rowVars()}) + +\item \code{rowVars.IterableMatrix()}: Calculate rowVars (replacement for \code{matrixStats::rowVars()}) + +\item \code{rowMaxs(default)}: Calculate rowMaxs (replacement for \code{matrixStats::rowMaxs()}) + +\item \code{rowMaxs.IterableMatrix()}: Calculate rowMaxs (replacement for \code{matrixStats::rowMaxs()}) + +\item \code{colMaxs(default)}: Calculate colMaxs (replacement for \code{matrixStats::colMaxs()}) + +\item \code{colMaxs.IterableMatrix()}: Calculate colMaxs (replacement for \code{matrixStats::colMaxs()}) + +\item \code{rowQuantiles(default)}: Calculate rowQuantiles (replacement for \code{matrixStats::rowQuantiles}) + +\item \code{rowQuantiles.IterableMatrix()}: Calculate rowQuantiles (replacement for \code{matrixStats::rowQuantiles}) + +\item \code{colQuantiles(default)}: Calculate colQuantiles (replacement for \code{matrixStats::colQuantiles}) + +\item \code{colQuantiles.IterableMatrix()}: Calculate colQuantiles (replacement for \code{matrixStats::colQuantiles}) + +}} +\section{Availability}{ + +Methods are registered conditionally; if MatrixGenerics is not installed, +nothing is registered and the generics fall back as usual. +} + +\examples{ +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +####################################################################### +## colVars() example +####################################################################### +colVars(mat) + +####################################################################### +## colVars() example +####################################################################### +colVars(mat) + +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +####################################################################### +## rowVars() example +####################################################################### +rowVars(mat) + +####################################################################### +## rowVars() example +####################################################################### +rowVars(mat) + +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +####################################################################### +## rowMaxs() example +####################################################################### +rowMaxs(mat) + +####################################################################### +## rowMaxs() example +####################################################################### +rowMaxs(mat) + +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +####################################################################### +## colMaxs() example +####################################################################### +colMaxs(mat) + +####################################################################### +## colMaxs() example +####################################################################### +colMaxs(mat) + +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +####################################################################### +## rowQuantiles() example +####################################################################### +rowQuantiles(transpose_storage_order(mat)) + +####################################################################### +## rowQuantiles() example +####################################################################### +rowQuantiles(transpose_storage_order(mat)) + +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +####################################################################### +## colQuantiles() example +####################################################################### +colQuantiles(mat) + +####################################################################### +## colQuantiles() example +####################################################################### +colQuantiles(mat) + +} +\seealso{ +\code{\link{rowQuantiles}}, \code{\link{colQuantiles}}, +\code{\link{rowVars}}, \code{\link{colVars}}, +\code{\link{rowMaxs}}, \code{\link{colMaxs}} +} +\keyword{internal} diff --git a/r/man/IterableMatrix-methods-core.Rd b/r/man/IterableMatrix-methods-core.Rd new file mode 100644 index 00000000..3958b051 --- /dev/null +++ b/r/man/IterableMatrix-methods-core.Rd @@ -0,0 +1,123 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R +\name{IterableMatrix-methods-core} +\alias{IterableMatrix-methods-core} +\alias{matrix_type} +\alias{storage_order} +\alias{storage_order,IterableMatrix-method} +\alias{show,IterableMatrix-method} +\alias{t,IterableMatrix-method} +\alias{[,IterableMatrix,ANY,ANY,ANY-method} +\alias{[<-,IterableMatrix,ANY,ANY,ANY-method} +\alias{dimnames<-,IterableMatrix,list-method} +\alias{dimnames<-,IterableMatrix,NULL-method} +\title{IterableMatrix core methods} +\usage{ +matrix_type(x) + +storage_order(x) + +\S4method{storage_order}{IterableMatrix}(x) + +\S4method{show}{IterableMatrix}(object) + +\S4method{t}{IterableMatrix}(x) + +\S4method{[}{IterableMatrix,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{[}{IterableMatrix,ANY,ANY,ANY}(x, i, j, ...) <- value + +\S4method{dimnames}{IterableMatrix,list}(x) <- value + +\S4method{dimnames}{IterableMatrix,NULL}(x) <- value +} +\arguments{ +\item{x}{IterableMatrix object} + +\item{object}{IterableMatrix object} + +\item{i}{Row indices or selection helpers.} + +\item{j}{Column indices or selection helpers.} + +\item{...}{Additional arguments passed to methods} + +\item{drop}{Logical indicating whether to drop dimensions when subsetting.} + +\item{value}{New dimnames (list of length 2, or NULL)} +} +\value{ +\itemize{ +\item \code{t()} Transposed object +} +} +\description{ +Core operations for IterableMatrix objects: inspection, transpose, subsetting, and dimnames. +} +\section{Functions}{ +\itemize{ +\item \code{matrix_type()}: Get the matrix data type (mat_uint32_t, mat_float, or mat_double for now) + +\item \code{storage_order()}: Get the matrix storage order ("row" or "col") (generic) + +\item \code{storage_order(IterableMatrix)}: Get the matrix storage order ("row" or "col") + +\item \code{show(IterableMatrix)}: Display an IterableMatrix + +\item \code{t(IterableMatrix)}: Transpose an IterableMatrix + +\item \code{x[i}: Subset an IterableMatrix + +\item \code{`[`(x = IterableMatrix, i = ANY, j = ANY) <- value}: Assign into an IterableMatrix + +\item \code{dimnames(x = IterableMatrix) <- value}: Set dimnames of an IterableMatrix, similar to base R \verb{dimnames<-()} + +\item \code{dimnames(x = IterableMatrix) <- value}: Remove dimnames of an IterableMatrix + +}} +\examples{ +## Prep data +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +mat + + +####################################################################### +## matrix_type() example +####################################################################### +matrix_type(mat) + + +####################################################################### +## storage_order() example +####################################################################### +storage_order(mat) + + +####################################################################### +## show() example +####################################################################### +show(mat) + + +####################################################################### +## t() example +####################################################################### +t(mat) + + +#' #################################################################### +## x[i, j, ..., drop = TRUE] example +####################################################################### +mat[1:2, 1:2] +####################################################################### +## x[i, j, ...] <- value example +####################################################################### +mat_changed <- mat +new_mat <- matrix(rep(2,4), nrow = 2) \%>\% as("IterableMatrix") +mat_changed[1:2, 1:2] <- new_mat +mat_changed \%>\% as("dgCMatrix") + + +} diff --git a/r/man/IterableMatrix-methods-ops-misc.Rd b/r/man/IterableMatrix-methods-ops-misc.Rd new file mode 100644 index 00000000..1b9ce7a4 --- /dev/null +++ b/r/man/IterableMatrix-methods-ops-misc.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R, R/transforms.R +\name{IterableMatrix-methods-ops-misc} +\alias{IterableMatrix-methods-ops-misc} +\alias{<,IterableMatrix,numeric-method} +\alias{>,numeric,IterableMatrix-method} +\alias{<=,IterableMatrix,numeric-method} +\alias{>=,numeric,IterableMatrix-method} +\alias{*,numeric,IterableMatrix-method} +\alias{+,numeric,IterableMatrix-method} +\alias{-,numeric,IterableMatrix-method} +\title{IterableMatrix operations (additional overloads)} +\usage{ +\S4method{<}{IterableMatrix,numeric}(e1, e2) + +\S4method{>}{numeric,IterableMatrix}(e1, e2) + +\S4method{<=}{IterableMatrix,numeric}(e1, e2) + +\S4method{>=}{numeric,IterableMatrix}(e1, e2) + +\S4method{*}{numeric,IterableMatrix}(e1, e2) + +\S4method{+}{numeric,IterableMatrix}(e1, e2) + +\S4method{-}{numeric,IterableMatrix}(e1, e2) +} +\description{ +Extra operator overloads documented separately to avoid duplicate entries in the main reference. +} +\section{Functions}{ +\itemize{ +\item \code{e1 < e2}: Perform matrix < numeric comparison (IterableMatrix left) + +\item \code{e1 > e2}: Perform numeric > matrix comparison (numeric left) + +\item \code{e1 <= e2}: Perform matrix <= numeric comparison (IterableMatrix left) + +\item \code{e1 >= e2}: Compare a numeric value to an IterableMatrix using >= (numeric left operand) + +\item \code{e1 * e2}: Multiply an IterableMatrix by a numeric value or row-wise vector (numeric left operand) + +\item \code{e1 + e2}: Add an IterableMatrix to a numeric value or row-wise vector (numeric left operand) + +\item \code{e1 - e2}: Subtract matrix from a numeric constant/vector + +}} +\keyword{internal} diff --git a/r/man/IterableMatrix-methods-ops.Rd b/r/man/IterableMatrix-methods-ops.Rd new file mode 100644 index 00000000..16235c0f --- /dev/null +++ b/r/man/IterableMatrix-methods-ops.Rd @@ -0,0 +1,164 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R, R/transforms.R +\name{IterableMatrix-methods-ops} +\alias{IterableMatrix-methods-ops} +\alias{\%*\%,IterableMatrix,matrix-method} +\alias{^,IterableMatrix,numeric-method} +\alias{<,numeric,IterableMatrix-method} +\alias{>,IterableMatrix,numeric-method} +\alias{<=,numeric,IterableMatrix-method} +\alias{>=,IterableMatrix,numeric-method} +\alias{round,IterableMatrix-method} +\alias{*,IterableMatrix,numeric-method} +\alias{+,IterableMatrix,numeric-method} +\alias{/,IterableMatrix,numeric-method} +\alias{-,IterableMatrix,numeric-method} +\title{IterableMatrix operations} +\usage{ +\S4method{\%*\%}{IterableMatrix,matrix}(x, y) + +\S4method{^}{IterableMatrix,numeric}(e1, e2) + +\S4method{<}{numeric,IterableMatrix}(e1, e2) + +\S4method{>}{IterableMatrix,numeric}(e1, e2) + +\S4method{<=}{numeric,IterableMatrix}(e1, e2) + +\S4method{>=}{IterableMatrix,numeric}(e1, e2) + +\S4method{round}{IterableMatrix}(x, digits = 0) + +\S4method{*}{IterableMatrix,numeric}(e1, e2) + +\S4method{+}{IterableMatrix,numeric}(e1, e2) + +\S4method{/}{IterableMatrix,numeric}(e1, e2) + +\S4method{-}{IterableMatrix,numeric}(e1, e2) +} +\arguments{ +\item{x}{IterableMatrix object} + +\item{y}{matrix} + +\item{e1}{First element of comparison} + +\item{e2}{Second element of comparison} + +\item{digits}{Number of decimal places for rounding} +} +\value{ +\itemize{ +\item \code{x \%*\% y}: dense matrix result +} +} +\description{ +Matrix multiplication, arithmetic, and comparison operations for IterableMatrix objects. +} +\section{Functions}{ +\itemize{ +\item \code{x \%*\% y}: Multiply by a dense matrix + +\item \code{e1^e2}: Calculate x^y (elementwise; y > 0) + +\item \code{e1 < e2}: Perform numeric < matrix comparison (numeric left) + +\item \code{e1 > e2}: Perform matrix > numeric comparison (IterableMatrix left) + +\item \code{e1 <= e2}: Perform numeric <= matrix comparison (numeric left) + +\item \code{e1 >= e2}: Perform matrix >= numeric comparison (IterableMatrix left) + +\item \code{round(IterableMatrix)}: round to nearest integer (digits must be 0) + +\item \code{e1 * e2}: Multiply by a constant, or multiply rows by a vector length nrow(mat) + +\item \code{e1 + e2}: Add a constant, or row-wise addition with a vector length nrow(mat) + +\item \code{e1 / e2}: Divide by a constant, or divide rows by a vector length nrow(mat) + +\item \code{e1 - e2}: Subtract a constant, or row-wise subtraction with a vector length nrow(mat) + +}} +\examples{ +## Prep data +mat <- matrix(1:25, nrow = 5) \%>\% as("dgCMatrix") +mat +mat <- as(mat, "IterableMatrix") +mat + +####################################################################### +## `x \%*\% y` example +####################################################################### +mat \%*\% as(matrix(1:50, nrow = 5), "dgCMatrix") + + +5 < mat + + +####################################################################### +## `e1 > e2` example +####################################################################### +mat > 5 + + +####################################################################### +## `e1 <= e2` example +####################################################################### +5 <= mat + + +####################################################################### +## `e1 >= e2` example +####################################################################### +mat >= 5 + + +####################################################################### +## round() example +####################################################################### +round(mat) + + +####################################################################### +## `e1 * e2` example +####################################################################### +## Multiplying by a constant +mat * 5 + +## Multiplying by a vector of length `nrow(mat)` +mat * 1:nrow(mat) + + +####################################################################### +## `e1 + e2` example +####################################################################### +## Add by a constant +mat + 5 + +## Adding row-wise by a vector of length `nrow(mat)` +mat + 1:nrow(mat) + + +####################################################################### +## `e1 / e2` example +####################################################################### +## Divide by a constant +mat / 5 + +## Divide by a vector of length `nrow(mat)` +mat / 1:nrow(mat) + + +####################################################################### +## `e1 - e2` example +####################################################################### +## Subtracting by a constant +mat - 5 + +## Subtracting by a vector of length `nrow(mat)` +mat - 1:nrow(mat) + + +} diff --git a/r/man/IterableMatrix-methods.Rd b/r/man/IterableMatrix-methods-stats.Rd similarity index 54% rename from r/man/IterableMatrix-methods.Rd rename to r/man/IterableMatrix-methods-stats.Rd index f737299c..66c622f3 100644 --- a/r/man/IterableMatrix-methods.Rd +++ b/r/man/IterableMatrix-methods-stats.Rd @@ -1,12 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/matrix.R, R/matrix_stats.R, R/transforms.R -\name{IterableMatrix-methods} -\alias{IterableMatrix-methods} -\alias{matrix_type} -\alias{storage_order} -\alias{show,IterableMatrix-method} -\alias{t,IterableMatrix-method} -\alias{\%*\%,IterableMatrix,matrix-method} +\name{IterableMatrix-methods-stats} +\alias{IterableMatrix-methods-stats} \alias{rowSums,IterableMatrix-method} \alias{colSums,IterableMatrix-method} \alias{rowMeans,IterableMatrix-method} @@ -21,28 +16,8 @@ \alias{log1p_slow} \alias{expm1,IterableMatrix-method} \alias{expm1_slow} -\alias{^,IterableMatrix,numeric-method} -\alias{<,numeric,IterableMatrix-method} -\alias{>,IterableMatrix,numeric-method} -\alias{<=,numeric,IterableMatrix-method} -\alias{>=,IterableMatrix,numeric-method} -\alias{round,IterableMatrix-method} -\alias{*,IterableMatrix,numeric-method} -\alias{+,IterableMatrix,numeric-method} -\alias{/,IterableMatrix,numeric-method} -\alias{-,IterableMatrix,numeric-method} -\title{IterableMatrix methods} +\title{IterableMatrix summary statistics} \usage{ -matrix_type(x) - -storage_order(x) - -\S4method{show}{IterableMatrix}(object) - -\S4method{t}{IterableMatrix}(x) - -\S4method{\%*\%}{IterableMatrix,matrix}(x, y) - \S4method{rowSums}{IterableMatrix}(x) \S4method{colSums}{IterableMatrix}(x) @@ -108,47 +83,31 @@ log1p_slow(x) \S4method{expm1}{IterableMatrix}(x) expm1_slow(x) +} +\arguments{ +\item{x}{IterableMatrix object or a matrix-like object.} -\S4method{^}{IterableMatrix,numeric}(e1, e2) - -\S4method{<}{numeric,IterableMatrix}(e1, e2) - -\S4method{>}{IterableMatrix,numeric}(e1, e2) - -\S4method{<=}{numeric,IterableMatrix}(e1, e2) +\item{rows}{(Integer) Optional vector of row indices to operate over.} -\S4method{>=}{IterableMatrix,numeric}(e1, e2) +\item{cols}{(Integer) Optional vector of column indices to operate over.} -\S4method{round}{IterableMatrix}(x, digits = 0) +\item{na.rm}{(Logical) Should missing values (NA) be removed?} -\S4method{*}{IterableMatrix,numeric}(e1, e2) +\item{center}{Optional center values (vector of length nrow(x) or ncol(x))} -\S4method{+}{IterableMatrix,numeric}(e1, e2) +\item{...}{Additional arguments passed to methods} -\S4method{/}{IterableMatrix,numeric}(e1, e2) - -\S4method{-}{IterableMatrix,numeric}(e1, e2) -} -\arguments{ -\item{x}{IterableMatrix object or a matrix-like object.} - -\item{object}{IterableMatrix object} - -\item{y}{matrix} +\item{useNames}{(Logical) Whether to use row and column names in the output.} \item{probs}{(Numeric) Quantile value(s) to be computed, between 0 and 1.} \item{type}{(Integer) between 4 and 9 selecting which quantile algorithm to use, detailed in \code{matrixStats::rowQuantiles()}} -} -\value{ -\itemize{ -\item \code{t()} Transposed object -} -\itemize{ -\item \code{x \%*\% y}: dense matrix result -} +\item{digits}{Number of decimal places for quantile calculations} +\item{drop}{(Logical) If TRUE and only one quantile is requested, the result is coerced to a vector (For non-BPCells objects).} +} +\value{ \itemize{ \item \code{rowSums()}: vector of row sums } @@ -192,20 +151,10 @@ Else, return a Matrix of quantile values, with cols representing each quantile, } } \description{ -Generic methods and built-in functions for IterableMatrix objects +Summaries and reductions for IterableMatrix objects (sums, means, variances, quantiles, extrema). } \section{Functions}{ \itemize{ -\item \code{matrix_type()}: Get the matrix data type (mat_uint32_t, mat_float, or mat_double for now) - -\item \code{storage_order()}: Get the matrix storage order ("row" or "col") - -\item \code{show(IterableMatrix)}: Display an IterableMatrix - -\item \code{t(IterableMatrix)}: Transpose an IterableMatrix - -\item \code{x \%*\% y}: Multiply by a dense matrix - \item \code{rowSums(IterableMatrix)}: Calculate rowSums \item \code{colSums(IterableMatrix)}: Calculate colSums @@ -216,9 +165,9 @@ Generic methods and built-in functions for IterableMatrix objects \item \code{colVars()}: Calculate colVars (replacement for \code{matrixStats::colVars()}) -\item \code{rowVars()}: Calculate rowVars (replacement for \code{matrixStats::rowVars()}) +\item \code{rowVars()}: Calculate rowVars (replacement for \code{matrixStats::rowVars()}) (generic) -\item \code{rowMaxs()}: Calculate rowMaxs (replacement for \code{matrixStats::rowMaxs()}) +\item \code{rowMaxs()}: Calculate rowMaxs (replacement for \code{matrixStats::rowMaxs()}) (generic) \item \code{colMaxs()}: Calculate colMax (replacement for \code{matrixStats::colMax()}) @@ -234,26 +183,6 @@ Generic methods and built-in functions for IterableMatrix objects \item \code{expm1_slow()}: Calculate exp(x) - 1 (non-SIMD version) -\item \code{e1^e2}: Calculate x^y (elementwise; y > 0) - -\item \code{e1 < e2}: Binarize matrix according to numeric < matrix comparison - -\item \code{e1 > e2}: Binarize matrix according to matrix > numeric comparison - -\item \code{e1 <= e2}: Binarize matrix according to numeric <= matrix comparison - -\item \code{e1 >= e2}: Binarize matrix according to matrix >= numeric comparison - -\item \code{round(IterableMatrix)}: round to nearest integer (digits must be 0) - -\item \code{e1 * e2}: Multiply by a constant, or multiply rows by a vector length nrow(mat) - -\item \code{e1 + e2}: Add a constant, or row-wise addition with a vector length nrow(mat) - -\item \code{e1 / e2}: Divide by a constant, or divide rows by a vector length nrow(mat) - -\item \code{e1 - e2}: Subtract a constant, or row-wise subtraction with a vector length nrow(mat) - }} \examples{ ## Prep data @@ -262,37 +191,6 @@ mat mat <- as(mat, "IterableMatrix") mat - -####################################################################### -## matrix_type() example -####################################################################### -matrix_type(mat) - - -####################################################################### -## storage_order() example -####################################################################### -storage_order(mat) - - -####################################################################### -## show() example -####################################################################### -show(mat) - - -####################################################################### -## t() example -####################################################################### -t(mat) - - -####################################################################### -## `x \%*\% y` example -####################################################################### -mat \%*\% as(matrix(1:50, nrow = 5), "dgCMatrix") - - ####################################################################### ## rowSums() example ####################################################################### @@ -312,7 +210,7 @@ rowMeans(mat) ####################################################################### -## colMeans() example +# colMeans() example ####################################################################### colMeans(mat) @@ -323,6 +221,12 @@ colMeans(mat) colVars(mat) +####################################################################### +## rowVars() example +####################################################################### +rowVars(mat) + + ####################################################################### ## rowMaxs() example ####################################################################### @@ -371,74 +275,4 @@ expm1(mat) expm1_slow(mat) -####################################################################### -## `e1 < e2` example -####################################################################### -5 < mat - - -####################################################################### -## `e1 > e2` example -####################################################################### -mat > 5 - - -####################################################################### -## `e1 <= e2` example -####################################################################### -5 <= mat - - -####################################################################### -## `e1 >= e2` example -####################################################################### -mat >= 5 - - -####################################################################### -## round() example -####################################################################### -round(mat) - - -####################################################################### -## `e1 * e2` example -####################################################################### -## Multiplying by a constant -mat * 5 - -## Multiplying by a vector of length `nrow(mat)` -mat * 1:nrow(mat) - - -####################################################################### -## `e1 + e2` example -####################################################################### -## Add by a constant -mat + 5 - -## Adding row-wise by a vector of length `nrow(mat)` -mat + 1:nrow(mat) - - -####################################################################### -## `e1 / e2` example -####################################################################### -## Divide by a constant -mat / 5 - -## Divide by a vector of length `nrow(mat)` -mat / 1:nrow(mat) - - -####################################################################### -## `e1 - e2` example -####################################################################### -## Subtracting by a constant -mat - 5 - -## Subtracting by a vector of length `nrow(mat)` -mat - 1:nrow(mat) - - } diff --git a/r/man/IterableMatrix-misc-methods.Rd b/r/man/IterableMatrix-misc-methods.Rd new file mode 100644 index 00000000..446f882c --- /dev/null +++ b/r/man/IterableMatrix-misc-methods.Rd @@ -0,0 +1,241 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R, R/transforms.R +\docType{methods} +\name{IterableMatrix-misc-methods} +\alias{IterableMatrix-misc-methods} +\alias{\%*\%,matrix,IterableMatrix-method} +\alias{\%*\%,IterableMatrix,numeric-method} +\alias{\%*\%,numeric,IterableMatrix-method} +\alias{matrix_type,MatrixMultiply-method} +\alias{\%*\%,IterableMatrix,IterableMatrix-method} +\alias{\%*\%,IterableMatrix,dgCMatrix-method} +\alias{\%*\%,dgCMatrix,IterableMatrix-method} +\alias{[,MatrixMultiply,ANY,ANY,ANY-method} +\alias{matrix_type,MatrixMask-method} +\alias{matrix_type,MatrixRankTransform-method} +\alias{matrix_type,MatrixSubset-method} +\alias{[,MatrixSubset,ANY,ANY,ANY-method} +\alias{matrix_type,RenameDims-method} +\alias{[,RenameDims,ANY,ANY,ANY-method} +\alias{matrix_type,RowBindMatrices-method} +\alias{matrix_type,ColBindMatrices-method} +\alias{[,RowBindMatrices,ANY,ANY,ANY-method} +\alias{[,ColBindMatrices,ANY,ANY,ANY-method} +\alias{matrix_type,PackedMatrixMem_uint32_t-method} +\alias{matrix_type,PackedMatrixMem_float-method} +\alias{matrix_type,PackedMatrixMem_double-method} +\alias{matrix_type,UnpackedMatrixMem_uint32_t-method} +\alias{matrix_type,UnpackedMatrixMem_float-method} +\alias{matrix_type,UnpackedMatrixMem_double-method} +\alias{matrix_type,MatrixDir-method} +\alias{matrix_type,EXPERIMENTAL_MatrixDirCompressedCol-method} +\alias{matrix_type,MatrixH5-method} +\alias{matrix_type,10xMatrixH5-method} +\alias{matrix_type,AnnDataMatrixH5-method} +\alias{matrix_type,PeakMatrix-method} +\alias{[,PeakMatrix,ANY,ANY,ANY-method} +\alias{matrix_type,TileMatrix-method} +\alias{[,TileMatrix,ANY,ANY,ANY-method} +\alias{matrix_type,ConvertMatrixType-method} +\alias{[,ConvertMatrixType,ANY,ANY,ANY-method} +\alias{matrix_type,Iterable_dgCMatrix_wrapper-method} +\alias{matrix_type,TransformedMatrix-method} +\alias{[,TransformedMatrix,ANY,ANY,ANY-method} +\alias{*,TransformScaleShift,numeric-method} +\alias{+,TransformScaleShift,numeric-method} +\alias{*,numeric,TransformScaleShift-method} +\alias{+,numeric,TransformScaleShift-method} +\title{IterableMatrix subclass methods} +\usage{ +\S4method{\%*\%}{matrix,IterableMatrix}(x, y) + +\S4method{\%*\%}{IterableMatrix,numeric}(x, y) + +\S4method{\%*\%}{numeric,IterableMatrix}(x, y) + +\S4method{matrix_type}{MatrixMultiply}(x) + +\S4method{\%*\%}{IterableMatrix,IterableMatrix}(x, y) + +\S4method{\%*\%}{IterableMatrix,dgCMatrix}(x, y) + +\S4method{\%*\%}{dgCMatrix,IterableMatrix}(x, y) + +\S4method{[}{MatrixMultiply,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{matrix_type}{MatrixMask}(x) + +\S4method{matrix_type}{MatrixRankTransform}(x) + +\S4method{matrix_type}{MatrixSubset}(x) + +\S4method{[}{MatrixSubset,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{matrix_type}{RenameDims}(x) + +\S4method{[}{RenameDims,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{matrix_type}{RowBindMatrices}(x) + +\S4method{matrix_type}{ColBindMatrices}(x) + +\S4method{[}{RowBindMatrices,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{[}{ColBindMatrices,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{matrix_type}{PackedMatrixMem_uint32_t}(x) + +\S4method{matrix_type}{PackedMatrixMem_float}(x) + +\S4method{matrix_type}{PackedMatrixMem_double}(x) + +\S4method{matrix_type}{UnpackedMatrixMem_uint32_t}(x) + +\S4method{matrix_type}{UnpackedMatrixMem_float}(x) + +\S4method{matrix_type}{UnpackedMatrixMem_double}(x) + +\S4method{matrix_type}{MatrixDir}(x) + +\S4method{matrix_type}{EXPERIMENTAL_MatrixDirCompressedCol}(x) + +\S4method{matrix_type}{MatrixH5}(x) + +\S4method{matrix_type}{10xMatrixH5}(x) + +\S4method{matrix_type}{AnnDataMatrixH5}(x) + +\S4method{matrix_type}{PeakMatrix}(x) + +\S4method{[}{PeakMatrix,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{matrix_type}{TileMatrix}(x) + +\S4method{[}{TileMatrix,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{matrix_type}{ConvertMatrixType}(x) + +\S4method{[}{ConvertMatrixType,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{matrix_type}{Iterable_dgCMatrix_wrapper}(x) + +\S4method{matrix_type}{TransformedMatrix}(x) + +\S4method{[}{TransformedMatrix,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE) + +\S4method{*}{TransformScaleShift,numeric}(e1, e2) + +\S4method{+}{TransformScaleShift,numeric}(e1, e2) + +\S4method{*}{numeric,TransformScaleShift}(e1, e2) + +\S4method{+}{numeric,TransformScaleShift}(e1, e2) +} +\arguments{ +\item{x}{An object inheriting from \code{IterableMatrix}.} + +\item{i}{Row indices or selection helpers.} + +\item{j}{Column indices or selection helpers.} + +\item{...}{Additional arguments passed through the call.} + +\item{drop}{Logical indicating whether to drop dimensions (for subsetting).} + +\item{e1}{Left operand for binary operations.} + +\item{e2}{Right operand for binary operations.} +} +\description{ +Methods for classes that extend \code{IterableMatrix} but are not dispatched +directly on the base class. These are typically helper objects that wrap +another matrix or alter behaviour (e.g., concatenation, on-disk access). +} +\section{Functions}{ +\itemize{ +\item \code{x \%*\% y}: Multiply a dense matrix by an IterableMatrix + +\item \code{x \%*\% y}: Multiply an IterableMatrix by a numeric vector + +\item \code{x \%*\% y}: Multiply a numeric row vector by an IterableMatrix + +\item \code{matrix_type(MatrixMultiply)}: Matrix data type for MatrixMultiply objects + +\item \code{x \%*\% y}: Multiply two IterableMatrix objects + +\item \code{x \%*\% y}: Multiply an IterableMatrix by a dgCMatrix + +\item \code{x \%*\% y}: Multiply a dgCMatrix by an IterableMatrix + +\item \code{x[i}: Subset MatrixMultiply results + +\item \code{matrix_type(MatrixMask)}: Matrix data type for MatrixMask objects + +\item \code{matrix_type(MatrixRankTransform)}: Matrix data type for MatrixRankTransform objects + +\item \code{matrix_type(MatrixSubset)}: Matrix data type for MatrixSubset objects + +\item \code{x[i}: Subset MatrixSubset transforms + +\item \code{matrix_type(RenameDims)}: Matrix data type for RenameDims objects + +\item \code{x[i}: Subset RenameDims transforms + +\item \code{matrix_type(RowBindMatrices)}: Matrix data type for RowBindMatrices objects + +\item \code{matrix_type(ColBindMatrices)}: Matrix data type for ColBindMatrices objects + +\item \code{x[i}: Subset RowBindMatrices transforms + +\item \code{x[i}: Subset ColBindMatrices transforms + +\item \code{matrix_type(PackedMatrixMem_uint32_t)}: Matrix data type for PackedMatrixMem_uint32_t objects + +\item \code{matrix_type(PackedMatrixMem_float)}: Matrix data type for PackedMatrixMem_float objects + +\item \code{matrix_type(PackedMatrixMem_double)}: Matrix data type for PackedMatrixMem_double objects + +\item \code{matrix_type(UnpackedMatrixMem_uint32_t)}: Matrix data type for UnpackedMatrixMem_uint32_t objects + +\item \code{matrix_type(UnpackedMatrixMem_float)}: Matrix data type for UnpackedMatrixMem_float objects + +\item \code{matrix_type(UnpackedMatrixMem_double)}: Matrix data type for UnpackedMatrixMem_double objects + +\item \code{matrix_type(MatrixDir)}: Matrix data type for MatrixDir objects + +\item \code{matrix_type(EXPERIMENTAL_MatrixDirCompressedCol)}: Matrix data type for EXPERIMENTAL_MatrixDirCompressedCol objects + +\item \code{matrix_type(MatrixH5)}: Matrix data type for MatrixH5 objects + +\item \code{matrix_type(`10xMatrixH5`)}: Matrix data type for 10xMatrixH5 objects + +\item \code{matrix_type(AnnDataMatrixH5)}: Matrix data type for AnnDataMatrixH5 objects + +\item \code{matrix_type(PeakMatrix)}: Matrix data type for PeakMatrix objects + +\item \code{x[i}: Subset a PeakMatrix + +\item \code{matrix_type(TileMatrix)}: Matrix data type for TileMatrix objects + +\item \code{x[i}: Subset a TileMatrix + +\item \code{matrix_type(ConvertMatrixType)}: Matrix data type for ConvertMatrixType objects + +\item \code{x[i}: Subset ConvertMatrixType transforms + +\item \code{matrix_type(Iterable_dgCMatrix_wrapper)}: Matrix data type for Iterable_dgCMatrix_wrapper objects + +\item \code{matrix_type(TransformedMatrix)}: Matrix data type for TransformedMatrix objects + +\item \code{x[i}: Subset TransformedMatrix results + +\item \code{e1 * e2}: Scale TransformScaleShift results by numeric values + +\item \code{e1 + e2}: Shift TransformScaleShift results by numeric values + +\item \code{e1 * e2}: Apply numeric scaling on the left to TransformScaleShift results + +\item \code{e1 + e2}: Add TransformScaleShift results to numeric values (numeric left operand) + +}} +\keyword{internal} diff --git a/r/man/LinearOperator-math.Rd b/r/man/LinearOperator-math.Rd new file mode 100644 index 00000000..e8e18f28 --- /dev/null +++ b/r/man/LinearOperator-math.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R +\docType{methods} +\name{LinearOperator-math} +\alias{LinearOperator-math} +\alias{\%*\%,LinearOperator,matrix-method} +\alias{\%*\%,matrix,LinearOperator-method} +\alias{\%*\%,LinearOperator,numeric-method} +\alias{\%*\%,numeric,LinearOperator-method} +\title{LinearOperator multiplication helpers} +\usage{ +\S4method{\%*\%}{LinearOperator,matrix}(x, y) + +\S4method{\%*\%}{matrix,LinearOperator}(x, y) + +\S4method{\%*\%}{LinearOperator,numeric}(x, y) + +\S4method{\%*\%}{numeric,LinearOperator}(x, y) +} +\arguments{ +\item{x}{Left operand.} + +\item{y}{Right operand.} +} +\description{ +Methods enabling \verb{\\\%*\%} between \code{LinearOperator} objects and dense matrices or numeric vectors. +} +\section{Functions}{ +\itemize{ +\item \code{x \%*\% y}: Multiply a LinearOperator by a dense matrix + +\item \code{x \%*\% y}: Multiply a dense matrix by a LinearOperator + +\item \code{x \%*\% y}: Multiply a LinearOperator by a numeric vector + +\item \code{x \%*\% y}: Multiply a numeric vector by a LinearOperator + +}} +\keyword{internal} diff --git a/r/man/all_matrix_inputs.Rd b/r/man/all_matrix_inputs.Rd index 235f0cb1..a5110fc8 100644 --- a/r/man/all_matrix_inputs.Rd +++ b/r/man/all_matrix_inputs.Rd @@ -25,3 +25,13 @@ getter and setter functions allow accessing the base-level input matrices as a l changing them. This is useful if you want to re-locate data on disk without losing your transformed BPCells matrix. (Note: experimental API; potentially subject to revisions). } +\examples{ +mat <- matrix(1:9, nrow=3) \%>\% as("IterableMatrix") + +# Just returns the matrix itself +all_matrix_inputs(mat) + +# Returns the matrix twice, as they are the inputs to the cbind +all_matrix_inputs(cbind(mat, mat)) + +} diff --git a/r/man/apply_by_row.Rd b/r/man/apply_by_row.Rd index 47bcf076..19470162 100644 --- a/r/man/apply_by_row.Rd +++ b/r/man/apply_by_row.Rd @@ -54,10 +54,12 @@ mat <- mat \%>\% as("dgCMatrix") \%>\% as("IterableMatrix") ## Get mean of every row ## expect an error in the case that col-major matrix is passed -apply_by_row(mat, function(val, row, col) {sum(val) / nrow(mat)}) \%>\% - unlist() - +try( + apply_by_row(mat, function(val, row, col) {sum(val) / nrow(mat)}) \%>\% + unlist() +) ## Need to transpose matrix to make sure it is in row-order + mat_row_order <- transpose_storage_order(mat) ## works as expected for row major diff --git a/r/man/binarize.Rd b/r/man/binarize.Rd index c162a8c8..26a3ee17 100644 --- a/r/man/binarize.Rd +++ b/r/man/binarize.Rd @@ -28,3 +28,15 @@ is set to FALSE, element values greater than or equal to the threshold are set to one. As an alternative, the \code{<}, \code{<=}, \code{>}, and \code{>=} operators are also supported. } +\examples{ +set.seed(12345) +mat <- matrix(rpois(40, lambda = 5), nrow = 4) +rownames(mat) <- paste0("gene", 1:4) +mat <- as(mat, "dgCMatrix") \%>\% as("IterableMatrix") + +####################################################################### +## binarize() example +####################################################################### +binarize(mat, threshold = 4) \%>\% as("dgCMatrix") + +} diff --git a/r/man/call_macs_peaks.Rd b/r/man/call_macs_peaks.Rd index 5ad425c5..4cc3dace 100644 --- a/r/man/call_macs_peaks.Rd +++ b/r/man/call_macs_peaks.Rd @@ -11,4 +11,7 @@ call_macs_peaks(...) This function has been renamed to \code{call_peaks_macs()} } +\examples{ +# See ?call_peaks_macs for examples +} \keyword{internal} diff --git a/r/man/call_peaks_macs.Rd b/r/man/call_peaks_macs.Rd index 5deba269..c75a5077 100644 --- a/r/man/call_peaks_macs.Rd +++ b/r/man/call_peaks_macs.Rd @@ -89,6 +89,7 @@ shell scripts generated at \verb{/input/.sh}. Finally, run \code{ca setting \code{step="read-outputs"}. } \examples{ +\dontshow{if (tryCatch({ macs_path_is_valid(); TRUE }, error = function(e) FALSE)) withAutoprint(\{ # examplesIf} macs_files <- file.path(tempdir(), "peaks") frags <- get_demo_frags() @@ -104,4 +105,5 @@ list.files(file.path(macs_files, "output", "all")) ## call_peaks_macs() can also solely perform the output reading step head(call_peaks_macs(frags, macs_files, step = "read-outputs")) +\dontshow{\}) # examplesIf} } diff --git a/r/man/call_peaks_tile.Rd b/r/man/call_peaks_tile.Rd index 6d830a5b..9430f963 100644 --- a/r/man/call_peaks_tile.Rd +++ b/r/man/call_peaks_tile.Rd @@ -87,9 +87,26 @@ frags <- get_demo_frags() ## Remove blacklist regions from fragments blacklist <- read_encode_blacklist(reference_dir, genome="hg38") frags_filter_blacklist <- select_regions(frags, blacklist, invert_selection = TRUE) -chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") \%>\% dplyr::filter(chr \%in\% c("chr4", "chr11")) +chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") \%>\% + dplyr::filter(chr \%in\% c("chr4", "chr11")) ## Call peaks +if (interactive()) { call_peaks_tile(frags_filter_blacklist, chrom_sizes, effective_genome_size = 2.8e9) } +#> # A tibble: 73,160 x 7 +#> chr start end group p_val q_val enrichment +#> +#> 1 chr11 65615400 65615600 all 0 0 6764. +#> 2 chr4 2262266 2262466 all 0 0 6422. +#> 3 chr11 119057200 119057400 all 0 0 6188. +#> 4 chr11 695133 695333 all 0 0 6180. +#> 5 chr11 2400400 2400600 all 0 0 6166. +#> 6 chr4 1346933 1347133 all 0 0 6109. +#> 7 chr11 3797600 3797800 all 0 0 6017. +#> 8 chr11 64878600 64878800 all 0 0 5948. +#> 9 chr11 57667733 57667933 all 0 0 5946. +#> 10 chr11 83156933 83157133 all 0 0 5913. +#> # i 73,150 more rows +} diff --git a/r/man/checksum.Rd b/r/man/checksum.Rd index 41f1b43e..be92a00f 100644 --- a/r/man/checksum.Rd +++ b/r/man/checksum.Rd @@ -18,7 +18,7 @@ hexidecimal format. } \details{ \code{checksum()} converts the non-zero elements of the sparse input matrix to double -precision, concatenates each element value with the element row and column index words, +precision, concatenates each element value with theF element row and column index words, and uses these 16-byte blocks along with the matrix dimensions and row and column names to calculate the checksum. The checksum value depends on the storage order so column- and row-order matrices with the same element values give different checksum @@ -27,8 +27,6 @@ It converts to little-endian order on big-endian architecture although this has been tested. } \examples{ -library(Matrix) -library(BPCells) m1 <- matrix(seq(1,12), nrow=3) m2 <- as(m1, 'dgCMatrix') m3 <- as(m2, 'IterableMatrix') diff --git a/r/man/cluster_cells_graph.Rd b/r/man/cluster_cells_graph.Rd index 0aaaa807..3c203467 100644 --- a/r/man/cluster_cells_graph.Rd +++ b/r/man/cluster_cells_graph.Rd @@ -72,6 +72,13 @@ For cells \code{i} and \code{j}, their similarity score is in \code{adjacency_ma \strong{cluster_graph_method}: First argument is a weighted similarity graph as returned by \code{knn_to_graph_method}. Returns a factor vector of length \code{cells} with a cluster assignment for each cell. +} +\examples{ +set.seed(123) +mat <- matrix(rnorm(1000 * 10), nrow = 1000) +clusters <- cluster_cells_graph(mat, threads = 1) +table(clusters) + } \seealso{ \code{knn_hnsw()} \code{knn_annoy()} \code{knn_to_graph()} \code{knn_to_snn_graph()} \code{knn_to_geodesic_graph()} \code{cluster_graph_leiden()} \code{cluster_graph_louvain()} \code{cluster_graph_seurat()} diff --git a/r/man/cluster_graph.Rd b/r/man/cluster_graph.Rd index 0794ca6c..989b8bd7 100644 --- a/r/man/cluster_graph.Rd +++ b/r/man/cluster_graph.Rd @@ -46,3 +46,25 @@ good default when \code{objective_function = "modularity"} per the default. \strong{cluster_graph_seurat}: Seurat's clustering algorithm \code{Seurat::FindClusters()} } +\examples{ +set.seed(123) +mat <- matrix(rnorm(1000 * 10), nrow = 1000) +knn <- knn_hnsw(mat, k = 10) +graph <- knn_to_snn_graph(knn) +clusters <- cluster_graph_leiden(graph, resolution = 0.5) + +set.seed(123) +mat <- matrix(rnorm(1000 * 10), nrow = 1000) +knn <- knn_hnsw(mat, k = 10) +graph <- knn_to_snn_graph(knn) +clusters <- cluster_graph_louvain(graph, resolution = 0.5) + +\dontrun{ +set.seed(123) +mat <- matrix(rnorm(1000 * 10), nrow = 1000) +knn <- knn_hnsw(mat, k = 10) +graph <- knn_to_snn_graph(knn) +clusters <- cluster_graph_seurat(graph, resolution = 0.5) +} + +} diff --git a/r/man/collect_features.Rd b/r/man/collect_features.Rd index 7cbc8836..3ebdf89c 100644 --- a/r/man/collect_features.Rd +++ b/r/man/collect_features.Rd @@ -35,3 +35,19 @@ If \code{source} is a data.frame, features will be drawn from the columns. If \code{source} is a matrix object (\code{IterableMatrix}, \code{dgCMatrix}, or \code{matrix}), features will be drawn from rows. } +\examples{ +# Collect features from a matrix +mat <- get_demo_mat() +# By ID +features_id <- collect_features(mat, "ENSG00000272602", gene_mapping = NULL) +head(features_id) + +# By Gene Symbol (using default human_gene_mapping) +features_symbol <- collect_features(mat, "MS4A1") +head(features_symbol) + +# Collect features from a data frame +df <- data.frame(a = 1:5, b = 6:10) +features_df <- collect_features(df, c("a", "b")) +head(features_df) +} diff --git a/r/man/draw_trackplot_grid.Rd b/r/man/draw_trackplot_grid.Rd index 216649a6..4e9fc7d0 100644 --- a/r/man/draw_trackplot_grid.Rd +++ b/r/man/draw_trackplot_grid.Rd @@ -40,4 +40,7 @@ by heights. A shared title and x-axis are put at the top. This function has been renamed to \code{trackplot_combine()}. } +\examples{ +# See ?trackplot_combine for examples +} \keyword{internal} diff --git a/r/man/ensure_downloaded.Rd b/r/man/ensure_downloaded.Rd index 0d0cc03c..93787cdf 100644 --- a/r/man/ensure_downloaded.Rd +++ b/r/man/ensure_downloaded.Rd @@ -9,9 +9,9 @@ ensure_downloaded(path, backup_url, timeout) \arguments{ \item{path}{Output path to write file} -\item{timeout}{timeout in seconds} +\item{backup_url}{to download from} -\item{url}{to download from} +\item{timeout}{timeout in seconds} } \description{ Download a file with a custom timeout diff --git a/r/man/extend_ranges.Rd b/r/man/extend_ranges.Rd index 357b1be6..196b4a61 100644 --- a/r/man/extend_ranges.Rd +++ b/r/man/extend_ranges.Rd @@ -25,7 +25,7 @@ extend_ranges( \item{metadata_cols}{Optional list of metadata columns to require & extract} -\item{chromosome_sizes}{(optional) Size of chromosomes as a \link{genomic-ranges} object} +\item{chromosome_sizes}{(optional) Size of chromosomes as a \link{genomic-ranges-like} object} \item{zero_based_coords}{If true, coordinates start and 0 and the end coordinate is not included in the range. If false, coordinates start at 1 and the end coordinate is included in the range} diff --git a/r/man/fragment_R_conversion.Rd b/r/man/fragment_R_conversion.Rd index a78dda7e..39df698c 100644 --- a/r/man/fragment_R_conversion.Rd +++ b/r/man/fragment_R_conversion.Rd @@ -1,17 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/fragments.R -\name{convert_to_fragments} +\name{fragment_R_conversion_coercions} +\alias{fragment_R_conversion_coercions} +\alias{as.data.frame.IterableFragments} \alias{convert_to_fragments} \title{Convert between BPCells fragments and R objects.} \usage{ -# Convert from R to BPCells -convert_to_fragments(x, zero_based_coords = !is(x, "GRanges")) -as(x, "IterableFragments") +\method{as.data.frame}{IterableFragments}(x, row.names = NULL, optional = FALSE, ...) -# Convert from BPCells to R -as.data.frame(bpcells_fragments) -as(bpcells_fragments, "data.frame") -as(bpcells_fragments, "GRanges") +convert_to_fragments(x, zero_based_coords = !is(x, "GRanges")) } \arguments{ \item{x}{Fragment coordinates given as GRanges, data.frame, or list. See \code{help("genomic-ranges-like")} for details on format and coordinate systems. Required attributes: @@ -23,7 +20,20 @@ as(bpcells_fragments, "GRanges") \item{zero_based_coords}{Whether to convert the ranges from a 1-based end-inclusive coordinate system to a 0-based end-exclusive coordinate system. Defaults to true for GRanges and false for other formats -(see this \href{https://web.archive.org/web/20210920203703/http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} +(see this \href{https://web.archive.org/web/20210920203703/https://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} + +\item{IterableFragments}{BPCells IterableFragments object} + +\item{data.frame}{Data frame with columns chr, start, end, and cell_id} + +\item{GRanges}{GenomicRanges object with metadata column cell_id} + +\item{...}{Additional arguments passed to methods} + +\item{row.names}{Optional row names for compatibility with \code{as.data.frame()}. +Ignored for IterableFragments objects.} + +\item{optional}{Logical flag for compatibility with \code{as.data.frame()}; ignored.} } \value{ \strong{convert_to_fragments()}: IterableFragments object @@ -35,12 +45,19 @@ The main conversion method is R's builtin \code{as()} function, though the GRanges, BPCells assumes a 0-based, end-exclusive coordinate system. (See \link{genomic-ranges-like} reference for details) } +\details{ +Coercions rely on base R's \code{as()}; for example \code{as(frags, "data.frame")} +converts BPCells fragments back to a tabular format, while \code{as(x, "IterableFragments")} +materialises supported R objects as fragment stores. Coercions to and from +\code{GRanges} require the GenomicRanges package to be installed. +} \examples{ -frags_table <- tibble::tibble( - chr = paste0("chr", 1:10), - start = 0, - end = 5, - cell_id = "cell1" +frags_table <- tibble::tribble( + ~chr, ~start, ~end, ~cell_id, + "chr1", 0, 5, "cell1", + "chr1", 2, 4, "cell2", + "chr2", 3, 6, "cell1", + "chr3", 7, 9, "cell2" ) frags_table diff --git a/r/man/fragment_r_conversion-misc.Rd b/r/man/fragment_r_conversion-misc.Rd new file mode 100644 index 00000000..b86ccccc --- /dev/null +++ b/r/man/fragment_r_conversion-misc.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fragments.R +\docType{methods} +\name{fragment_r_conversion-misc} +\alias{fragment_r_conversion-misc} +\alias{coerce,data.frame,IterableFragments-method} +\alias{coerce,IterableFragments,data.frame-method} +\alias{coerce,GRanges,IterableFragments-method} +\alias{coerce,IterableFragments,GRanges-method} +\title{Convert between BPCells fraagments and R objects misc.} +\usage{ +\S4method{coerce}{data.frame,IterableFragments}(from, to, ...) +\S4method{coerce}{IterableFragments,data.frame}(from, to, ...) +\S4method{coerce}{IterableFragments,GRanges}(from, to, ...) +\S4method{coerce}{GRanges,IterableFragments}(from, to, ...) +} +\arguments{ +\item{from}{Object supplied to \code{base::coerce()} (typically generated by \code{as()})} + +\item{to}{Target class name for coercion} + +\item{...}{Additional arguments passed to methods} +} +\description{ +See \code{fragment_R_conversion} for main page. +BPCells fragments can be interconverted with GRanges and data.frame R objects. +The main conversion method is R's builtin \code{as()} function, though the convert_to_fragments() helper is also available. +For all R objects except GRanges, BPCells assumes a 0-based, end-exclusive coordinate system. +(See `genomic-ranges-like`` reference for details) +} +\keyword{internal} diff --git a/r/man/gene_mapping.Rd b/r/man/gene_mapping.Rd index f57ac819..5004e5a2 100644 --- a/r/man/gene_mapping.Rd +++ b/r/man/gene_mapping.Rd @@ -19,12 +19,12 @@ are the corresponding canonical gene symbol \source{ \strong{human_gene_mapping} -\url{http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/non_alt_loci_set.txt} +\url{https://ftp.ebi.ac.uk/pub/databases/genenames/out_of_date_hgnc/tsv/non_alt_loci_set.txt} \strong{mouse_gene_mapping} -\url{http://www.informatics.jax.org/downloads/reports/MGI_EntrezGene.rpt} -\url{http://www.informatics.jax.org/downloads/reports/MRK_ENSEMBL.rpt} +\url{https://www.informatics.jax.org/downloads/reports/MGI_EntrezGene.rpt} +\url{https://www.informatics.jax.org/downloads/reports/MRK_ENSEMBL.rpt} } \usage{ human_gene_mapping diff --git a/r/man/gene_region.Rd b/r/man/gene_region.Rd index 4855da39..6577167f 100644 --- a/r/man/gene_region.Rd +++ b/r/man/gene_region.Rd @@ -36,13 +36,23 @@ can be used as the \code{region} argument for trackplot functions such as \code{trackplot_coverage()} or \code{trackplot_gene()} } \examples{ +\dontrun{ ## Prep data genes <- read_gencode_transcripts( file.path(tempdir(), "references"), release = "42", annotation_set = "basic", - features = "transcript" + features = "transcript", timeout = 3000 ) ## Get gene region gene_region(genes, "CD19", extend_bp = 1e5) +#> $chr +#> [1] "chr16" +#> +#> $start +#> [1] 28831970 +#> +#> $end +#> [1] 29039342 +} } diff --git a/r/man/gene_score_tiles_archr.Rd b/r/man/gene_score_tiles_archr.Rd index de53094f..2f36e14f 100644 --- a/r/man/gene_score_tiles_archr.Rd +++ b/r/man/gene_score_tiles_archr.Rd @@ -18,7 +18,7 @@ gene_score_tiles_archr( \item \code{strand}: +/- or TRUE/FALSE for positive or negative strand }} -\item{chromosome_sizes}{(optional) Size of chromosomes as a \link{genomic-ranges} object} +\item{chromosome_sizes}{(optional) Size of chromosomes as a \link{genomic-ranges-like} object} \item{tile_width}{Size of tiles to consider} @@ -48,14 +48,24 @@ beyond a neighboring gene are not considered } } \examples{ +\dontrun{ ## Prep data directory <- file.path(tempdir(), "references") genes <- read_gencode_genes( directory, release = "42", - annotation_set = "basic", + annotation_set = "basic", ) +} +## Use pre-generated data for this example +## Use pre-computed transcripts for chr4 +genes <- readr::read_delim( + file.path( + system.file("extdata", package = "BPCells"), + "transcripts_filtered_example_chr_4.tsv.gz"), + delim = "\t", show_col_types = FALSE +) ## Get gene scores by tile gene_score_tiles_archr( diff --git a/r/man/gene_scores.Rd b/r/man/gene_scores.Rd index df8ccd8e..be34d52d 100644 --- a/r/man/gene_scores.Rd +++ b/r/man/gene_scores.Rd @@ -86,6 +86,7 @@ calculate a weight matrix of dimensions genes x tiles. This matrix can be multiplied with a tile matrix to obtain ArchR-compatible gene activity scores. } \examples{ +\dontrun{ ## Prep data reference_dir <- file.path(tempdir(), "references") frags <- get_demo_frags() @@ -94,8 +95,10 @@ genes <- read_gencode_genes( release="42", annotation_set = "basic", ) \%>\% dplyr::filter(chr \%in\% c("chr4", "chr11")) -blacklist <- read_encode_blacklist(reference_dir, genome="hg38") \%>\% dplyr::filter(chr \%in\% c("chr4", "chr11")) -chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") \%>\% dplyr::filter(chr \%in\% c("chr4", "chr11")) +blacklist <- read_encode_blacklist(reference_dir, genome="hg38") \%>\% + dplyr::filter(chr \%in\% c("chr4", "chr11")) +chrom_sizes <- read_ucsc_chrom_sizes(reference_dir, genome="hg38") \%>\% + dplyr::filter(chr \%in\% c("chr4", "chr11")) chrom_sizes$tile_width = 500 @@ -112,14 +115,17 @@ tiles <- tile_matrix(frags, chrom_sizes, mode = "fragments") ## Get gene scores per cell gene_score_weights \%*\% tiles +} +\dontrun{ ####################################################################### ## gene_score_archr() example ####################################################################### ## This is a wrapper that creates both the gene score weight ## matrix and tile matrix together gene_score_archr(frags, genes, chrom_sizes, blacklist) +} } diff --git a/r/man/import_matrix_market.Rd b/r/man/import_matrix_market.Rd index 5b181651..c69536a6 100644 --- a/r/man/import_matrix_market.Rd +++ b/r/man/import_matrix_market.Rd @@ -68,3 +68,31 @@ files will be slower to import due to gzip decompression. When importing from 10x mtx files, the row and column names can be read automatically using the \code{import_matrix_market_10x()} convenience function. } +\examples{ +# Create a dummy matrix market file +matrix_market_path <- file.path(tempdir(), "matrix.mtx") +lines <- c( + "\%\%MatrixMarket matrix coordinate integer general", + "\% This is a comment", + "5 10 3", + "1 1 1", + "2 2 2", + "5 10 3" +) +writeLines(lines, matrix_market_path) + +# Import the matrix market file +mat <- import_matrix_market(matrix_market_path) +mat + +# Clean up +unlink(matrix_market_path) + +\dontrun{ +# Import 10x matrix market directory +mat <- import_matrix_market_10x( + "path/to/10x/matrix_market_dir" +) +} + +} diff --git a/r/man/knn.Rd b/r/man/knn.Rd index 07d97a07..4f655fba 100644 --- a/r/man/knn.Rd +++ b/r/man/knn.Rd @@ -68,3 +68,22 @@ utilizing both data and query. \strong{knn_annoy}: Use RcppAnnoy as knn engine } +\examples{ +############################################ +## knn_annoy() example +############################################ +set.seed(123) +mat <- matrix(rnorm(100 * 10), nrow = 10) +rownames(mat) <- paste0("cell", 1:10) +knn <- knn_hnsw(mat, k = 10) +knn + +############################################ +## knn_annoy() example +############################################ +set.seed(123) +mat <- matrix(rnorm(100 * 10), nrow = 10) +rownames(mat) <- paste0("cell", 1:10) +knn <- knn_annoy(mat, k = 10) +knn +} diff --git a/r/man/knn_graph.Rd b/r/man/knn_graph.Rd index 5d365c41..b7a3f0e6 100644 --- a/r/man/knn_graph.Rd +++ b/r/man/knn_graph.Rd @@ -84,3 +84,15 @@ less memory and return a sparser graph than \code{knn_to_snn_graph} which comput neighbor, results may differ slightly from \code{umap._umap.fuzzy_simplicial_set}, which assumes self is always successfully found in the approximate nearest neighbor search. } +\examples{ +set.seed(123) +mat <- matrix(rnorm(1000 * 10), nrow = 1000) +knn <- knn_hnsw(mat, k = 10) +graph <- knn_to_snn_graph(knn) + +set.seed(123) +mat <- matrix(rnorm(1000 * 10), nrow = 1000) +knn <- knn_hnsw(mat, k = 10) +graph <- knn_to_geodesic_graph(knn) + +} diff --git a/r/man/macs_path_is_valid.Rd b/r/man/macs_path_is_valid.Rd index 829d8190..4aa06566 100644 --- a/r/man/macs_path_is_valid.Rd +++ b/r/man/macs_path_is_valid.Rd @@ -12,7 +12,7 @@ macs_path_is_valid(macs_executable) \item{macs_executable}{(string) Path to either MACS2/3 executable. Default (\code{NULL}) will autodetect from PATH.} } \value{ -MACS executable path. +MACS executable path if valid, otherwise throws an error. } \description{ Test if MACS executable is valid. diff --git a/r/man/matrix_R_conversion.Rd b/r/man/matrix_R_conversion.Rd index b00d2669..3dca282b 100644 --- a/r/man/matrix_R_conversion.Rd +++ b/r/man/matrix_R_conversion.Rd @@ -1,26 +1,37 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/matrix.R -\name{matrix_R_conversion} +\name{matrix_R_conversion_coercions} +\alias{matrix_R_conversion_coercions} +\alias{as.matrix.IterableMatrix} +\alias{as.matrix,IterableMatrix-method} \alias{matrix_R_conversion} \title{Convert between BPCells matrix and R objects.} \usage{ -# Convert to R from BPCells -as(bpcells_mat, "dgCMatrix") # Sparse matrix conversion -as.matrix(bpcells_mat) # Dense matrix conversion +\method{as.matrix}{IterableMatrix}(x, ...) +} +\arguments{ +\item{x}{Matrix object to convert} -# Convert to BPCells from R -as(dgc_mat, "IterableMatrix") +\item{...}{Additional arguments passed to methods} } \description{ BPCells matrices can be interconverted with Matrix package dgCMatrix sparse matrices, as well as base R dense matrices (though this may result in high memory usage for large matrices) } +\details{ +Use base R's \code{as()} to convert between BPCells matrices and +\code{dgCMatrix}/\code{matrix} representations, while \code{as.matrix()} materialises +dense matrices directly when needed. +} \examples{ -mat <- get_demo_mat()[1:2, 1:2] +# setup data +mat <- matrix(1:25, nrow=5, ncol=5) +rownames(mat) <- paste0("gene", seq_len(5)) +colnames(mat) <- paste0("cell", seq_len(5)) +mat <- mat \%>\% as("IterableMatrix") mat - ####################################################################### ## as(bpcells_mat, "dgCMatrix") example ####################################################################### diff --git a/r/man/matrix_io.Rd b/r/man/matrix_io.Rd index 70e6c4c7..44dc0186 100644 --- a/r/man/matrix_io.Rd +++ b/r/man/matrix_io.Rd @@ -34,6 +34,8 @@ write_matrix_hdf5( open_matrix_hdf5(path, group, buffer_size = 16384L) } \arguments{ +\item{mat}{Input matrix, either IterableMatrix or dgCMatrix} + \item{compress}{Whether or not to compress the data.} \item{dir}{Directory to save the data into} @@ -54,8 +56,6 @@ to an existing hdf5 file this group must not already be in use} \item{gzip_level}{Gzip compression level. Default is 0 (no compression). This is recommended when both compression and compatibility with outside programs is required. Otherwise, using compress=TRUE is recommended as it is >10x faster with often similar compression levels.} - -\item{matrix}{Input matrix, either IterableMatrix or dgCMatrix} } \value{ BPCells matrix object diff --git a/r/man/matrix_r_conversion-misc.Rd b/r/man/matrix_r_conversion-misc.Rd new file mode 100644 index 00000000..95065fca --- /dev/null +++ b/r/man/matrix_r_conversion-misc.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R +\docType{methods} +\name{matrix_r_conversion-misc} +\alias{matrix_r_conversion-misc} +\alias{coerce,dgCMatrix,IterableMatrix-method} +\alias{coerce,IterableMatrix,dgCMatrix-method} +\alias{coerce,matrix,IterableMatrix-method} +\alias{coerce,IterableMatrix,matrix-method} +\title{Convert between BPCells matrices and R objects misc.} +\usage{ +\S4method{coerce}{dgCMatrix,IterableMatrix}(from, to, ...) +\S4method{coerce}{IterableMatrix,dgCMatrix}(from, to, ...) +\S4method{coerce}{matrix,IterableMatrix}(from, to, ...) +\S4method{coerce}{IterableMatrix,matrix}(from, to, ...) +} +\arguments{ +\item{from}{Object supplied to \code{base::coerce()} (typically generated by \code{as()})} + +\item{to}{Target class name for coercion} + +\item{...}{Additional arguments passed to methods} +} +\description{ +See \code{matrix_R_conversion} for main page. +BPCells matrices can be interconverted with Matrix package \code{dgCMatrix} sparse matrices, +as well as base R dense matrices (though this may result in high memory usage for large matrices). +} +\keyword{internal} diff --git a/r/man/min_elementwise.Rd b/r/man/min_elementwise.Rd index 8d06523d..ae9b9237 100644 --- a/r/man/min_elementwise.Rd +++ b/r/man/min_elementwise.Rd @@ -16,6 +16,8 @@ min_by_col(mat, vals) \item{mat}{IterableMatrix} \item{val}{Single positive numeric value} + +\item{vals}{Numeric vector of positive values, with length equal to the number of rows (min_by_row) or columns (min_by_col)} } \value{ IterableMatrix diff --git a/r/man/normalize_ranges.Rd b/r/man/normalize_ranges.Rd index 38acd248..bb9ca16b 100644 --- a/r/man/normalize_ranges.Rd +++ b/r/man/normalize_ranges.Rd @@ -21,6 +21,8 @@ normalize_ranges( \item{zero_based_coords}{If true, coordinates start and 0 and the end coordinate is not included in the range. If false, coordinates start at 1 and the end coordinate is included in the range} + +\item{n}{How many call frames to go up when printing errors} } \value{ data frame with zero-based coordinates, and elements chr (factor), start (int), and end (int). @@ -34,10 +36,11 @@ Normalize an object representing genomic ranges } \examples{ ## Prep data +library(S4Vectors) ranges <- GenomicRanges::GRanges( - seqnames = S4Vectors::Rle(c("chr1", "chr2", "chr3"), c(1, 2, 2)), + seqnames = Rle(c("chr1", "chr2", "chr3"), c(1, 2, 2)), ranges = IRanges::IRanges(101:105, end = 111:115, names = head(letters, 5)), - strand = S4Vectors::Rle(GenomicRanges::strand(c("-", "+", "*")), c(1, 2, 2)), + strand = Rle(GenomicRanges::strand(c("-", "+", "*")), c(1, 2, 2)), score = 1:5, GC = seq(1, 0, length=5)) ranges diff --git a/r/man/open_fragments_10x.Rd b/r/man/open_fragments_10x.Rd index 6aa30050..f2df00ed 100644 --- a/r/man/open_fragments_10x.Rd +++ b/r/man/open_fragments_10x.Rd @@ -47,15 +47,25 @@ No disk operations will take place until the fragments are used in a function Fragments will be written to disk immediately, then returned in a readable object. } \examples{ -## Download example fragments from pbmc 500 dataset and save in temp directory +####################################################################### +## write_fragments_10x() example +####################################################################### +## Prep data +frags_table <- tibble::tribble( + ~chr, ~start, ~end, ~cell_id, + "chr1", 0, 5, "cell1", + "chr1", 2, 4, "cell2", + "chr2", 3, 6, "cell1", + "chr3", 7, 9, "cell2" +) +frags_table +frags <- frags_table \%>\% convert_to_fragments() + data_dir <- file.path(tempdir(), "frags_10x") +frags_file <- "demo_10x_frags.tsv.gz" dir.create(data_dir, recursive = TRUE, showWarnings = FALSE) -url_base <- "https://cf.10xgenomics.com/samples/cell-atac/2.0.0/atac_pbmc_500_nextgem/" -frags_file <- "atac_pbmc_500_nextgem_fragments.tsv.gz" -atac_raw_url <- paste0(url_base, frags_file) -if (!file.exists(file.path(data_dir, frags_file))) { - download.file(atac_raw_url, file.path(data_dir, frags_file), mode="wb") -} +write_fragments_10x(frags, file.path(data_dir, frags_file)) + ####################################################################### ## open_fragments_10x() example @@ -68,19 +78,9 @@ frags <- open_fragments_10x( frags frags \%>\% write_fragments_dir( - file.path(data_dir, "demo_frags_from_h5"), + file.path(data_dir, "new_demo_10x_frags"), overwrite = TRUE ) -####################################################################### -## write_fragments_10x() example -####################################################################### -frags <- write_fragments_10x( - frags, - file.path(data_dir, paste0("new_", frags_file)) -) -frags - - } diff --git a/r/man/open_matrix_anndata_hdf5.Rd b/r/man/open_matrix_anndata_hdf5.Rd index 10c5da95..13be44a5 100644 --- a/r/man/open_matrix_anndata_hdf5.Rd +++ b/r/man/open_matrix_anndata_hdf5.Rd @@ -35,6 +35,8 @@ to an existing hdf5 file this group must not already be in use} \item{buffer_size}{For performance tuning only. The number of items to be buffered in memory before calling writes to disk.} +\item{mat}{IterableMatrix to write to hdf5 file} + \item{chunk_size}{For performance tuning only. The chunk size used for the HDF5 array storage.} \item{gzip_level}{Gzip compression level. Default is 0 (no compression)} diff --git a/r/man/palettes.Rd b/r/man/palettes.Rd index 00a01a90..7859e40b 100644 --- a/r/man/palettes.Rd +++ b/r/man/palettes.Rd @@ -27,3 +27,17 @@ large sets of distinguishable colors If the requested number of colors is too large, a new palette will be constructed via interpolation from the requested palette } +\examples{ +####################################################################### +## discrete_palette() example +####################################################################### +discrete_palette("stallion") + + +####################################################################### +## continuous_palette() example +####################################################################### +continuous_palette("bluePurpleDark") + + +} diff --git a/r/man/peak_matrix.Rd b/r/man/peak_matrix.Rd index c8e98fe7..51b0389a 100644 --- a/r/man/peak_matrix.Rd +++ b/r/man/peak_matrix.Rd @@ -25,7 +25,7 @@ peak_matrix( \item{zero_based_coords}{Whether to convert the ranges from a 1-based end-inclusive coordinate system to a 0-based end-exclusive coordinate system. Defaults to true for GRanges and false for other formats -(see this \href{https://web.archive.org/web/20210920203703/http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} +(see this \href{https://web.archive.org/web/20210920203703/https://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} \item{explicit_peak_names}{Boolean for whether to add rownames to the output matrix in format e.g chr1:500-1000, where start and end coords are given in a 0-based coordinate system. @@ -54,18 +54,41 @@ provide the ordering of chromosomes to expect while reading the tsv. } \examples{ ## Prep demo data -frags <- get_demo_frags(subset = FALSE) -chrom_sizes <- read_ucsc_chrom_sizes(file.path(tempdir(), "references"), genome="hg38") -blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") -frags_filter_blacklist <- frags \%>\% select_regions(blacklist, invert_selection = TRUE) -peaks <- call_peaks_tile( - frags_filter_blacklist, - chrom_sizes, - effective_genome_size = 2.8e9 +frags <- tibble::tribble( + ~chr, ~start, ~end, ~cell_id, + "chr1", 0, 5, "cell1", + "chr1", 2, 4, "cell2", + "chr2", 3, 6, "cell1", + "chr3", 7, 9, "cell2" +) \%>\% convert_to_fragments() +frags + +# Note: this is how we would normally call peaks given this data +# We use a toy example here +# chrom_sizes <- read_ucsc_chrom_sizes(file.path(tempdir(), "references"), genome="hg38") +# blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") +# frags \%>\% select_regions(blacklist, invert_selection = TRUE) +# peaks <- call_peaks_tile( +# frags_filter_blacklist, +# chrom_sizes, +# effective_genome_size = 2.8e9 +# ) +peaks <- tibble::tribble( + ~chr, ~start, ~end, ~group, ~p_val, ~q_val, ~enrichment, + "chr1", 1, 4, "all", 0, 0, 767, + "chr2", 2, 8, "all", 0, 0, 766, + "chr3", 5, 10, "all", 0, 0, 645 ) -top_peaks <- head(peaks, 5000) -top_peaks <- top_peaks[order_ranges(top_peaks, chrNames(frags)),] +peaks + +# We would normally select the top peaks like this: +# peaks <- head(peaks, 5000) +# peaks <- peaks[order_ranges(peaks, chrNames(frags)),] ## Get peak matrix -peak_matrix(frags_filter_blacklist, top_peaks, mode="insertions") +peak_mat <- peak_matrix(frags, peaks, mode="insertions") +peak_mat + +peak_mat \%>\% as("dgCMatrix") + } diff --git a/r/man/plot_dot.Rd b/r/man/plot_dot.Rd index e92da530..6b9e99af 100644 --- a/r/man/plot_dot.Rd +++ b/r/man/plot_dot.Rd @@ -44,9 +44,6 @@ mat <- get_demo_mat() cell_types <- paste("Group", rep(1:3, length.out = length(colnames(mat)))) ## Plot dot -plot <- plot_dot(mat, c("MS4A1", "CD3E"), cell_types) - -BPCells:::render_plot_from_storage( - plot, width = 4, height = 5 -) +scale_next_plot_height(0.8) +plot_dot(mat, c("MS4A1", "CD3E"), cell_types) } diff --git a/r/man/plot_embedding.Rd b/r/man/plot_embedding.Rd index 749ed763..a400b41a 100644 --- a/r/man/plot_embedding.Rd +++ b/r/man/plot_embedding.Rd @@ -97,13 +97,15 @@ value stays the same. } } \examples{ +## Prep data set.seed(123) -mat <- get_demo_mat() +mat <- get_demo_mat()[,sample(1:ncol(get_demo_mat()), 200)] ## Normalize matrix -mat_norm <- log1p(multiply_cols(mat, 1/colSums(mat)) * 10000) \%>\% write_matrix_memory(compress = FALSE) +mat_norm <- log1p(multiply_cols(mat, 1/colSums(mat)) * 10000) \%>\% + write_matrix_memory(compress = FALSE) ## Get variable genes stats <- matrix_stats(mat, row_stats = "variance") -variable_genes <- order(stats$row_stats["variance",], decreasing=TRUE) \%>\% +variable_genes <- order(stats$row_stats["variance",], decreasing=TRUE) \%>\% head(1000) \%>\% sort() # Z score normalize genes @@ -136,5 +138,4 @@ plot_embedding(clusts, umap) # umap, # features = c("MS4A1", "CD3E"), #) - } diff --git a/r/man/plot_fragment_length.Rd b/r/man/plot_fragment_length.Rd index 0931b560..54add516 100644 --- a/r/man/plot_fragment_length.Rd +++ b/r/man/plot_fragment_length.Rd @@ -30,6 +30,6 @@ x-axis, and proportion of fragments on the y-axis. Typical plots will show width (about 150bp). } \examples{ -frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) +frags <- get_demo_frags() plot_fragment_length(frags) } diff --git a/r/man/plot_tf_footprint.Rd b/r/man/plot_tf_footprint.Rd index 83dfc346..01d51fc8 100644 --- a/r/man/plot_tf_footprint.Rd +++ b/r/man/plot_tf_footprint.Rd @@ -10,7 +10,7 @@ plot_tf_footprint( cell_groups = rlang::rep_along(cellNames(fragments), "all"), flank = 250L, smooth = 0L, - zero_based_coords = !is(genes, "GRanges"), + zero_based_coords = !is(motif_positions, "GRanges"), colors = discrete_palette("stallion"), return_data = FALSE, apply_styling = TRUE @@ -33,6 +33,8 @@ weights for smoothing.} \item{zero_based_coords}{If true, coordinates start and 0 and the end coordinate is not included in the range. If false, coordinates start at 1 and the end coordinate is included in the range} +\item{colors}{Discrete color palette to use for cell groups} + \item{return_data}{If true, return data from just before plotting rather than a plot.} \item{apply_styling}{If false, return a plot without pretty styling applied} @@ -40,6 +42,20 @@ If false, coordinates start at 1 and the end coordinate is included in the range \description{ Plot the footprinting around TF motif sites } +\examples{ +\dontrun{ + +plot_tf_footprint( + frags, + motif_positions$CEBPA, + cell_groups = cell_types, + flank = 250, + smooth = 2 +) + + ggplot2::labs(title="CEBPA") +} +# See example in vignette Getting Started +} \seealso{ \code{footprint()}, \code{plot_tss_profile()} } diff --git a/r/man/plot_tss_profile.Rd b/r/man/plot_tss_profile.Rd index 08a5334f..d4d7471a 100644 --- a/r/man/plot_tss_profile.Rd +++ b/r/man/plot_tss_profile.Rd @@ -31,6 +31,8 @@ plot_tss_profile( \item{zero_based_coords}{If true, coordinates start and 0 and the end coordinate is not included in the range. If false, coordinates start at 1 and the end coordinate is included in the range} +\item{colors}{Discrete color palette to use for cell groups} + \item{return_data}{If true, return data from just before plotting rather than a plot.} \item{apply_styling}{If false, return a plot without pretty styling applied} @@ -41,16 +43,29 @@ Typically, this plot shows strong enrichment of insertions near a TSS, and a small bump downstream around 220bp downstream of the TSS for the +1 nucleosome. } \examples{ + ## Prep data -frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) +frags <- get_demo_frags() +\dontrun{ genes <- read_gencode_transcripts( file.path(tempdir(), "references"), release = "42", annotation_set = "basic", - features = "transcript" + features = "transcript", timeout = 3000 ) +} + +## Use pre-computed transcripts for chr4 +genes <- readr::read_delim( + file.path( + system.file("extdata", package = "BPCells"), + "transcripts_filtered_example_chr_4.tsv.gz"), + delim = "\t", show_col_types = FALSE +) + ## Plot tss profile plot_tss_profile(frags, genes) + } \seealso{ \code{footprint()}, \code{plot_tf_footprint()} diff --git a/r/man/plot_tss_scatter.Rd b/r/man/plot_tss_scatter.Rd index 9c75ffd4..5cc96671 100644 --- a/r/man/plot_tss_scatter.Rd +++ b/r/man/plot_tss_scatter.Rd @@ -29,17 +29,27 @@ enrichment on the y-axis. This plot is most useful to select which cell barcodes in an experiment correspond to high-quality cells } \examples{ +\dontrun{ ## Prep data frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) genes <- read_gencode_transcripts( file.path(tempdir(), "references"), release = "42", annotation_set = "basic", - features = "transcript" + features = "transcript", timeout = 3000 ) blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") atac_qc <- qc_scATAC(frags, genes, blacklist) +} +## Use pre-computed QC metrics for chr4 +atac_qc <- readr::read_delim( + file.path( + system.file("extdata", package = "BPCells"), + "qc_results_filtered_example_chr_4.tsv.gz"), + delim = "\t", show_col_types = FALSE +) ## Render tss enrichment vs fragment plot plot_tss_scatter(atac_qc, min_frags = 1000, min_tss = 10) + } diff --git a/r/man/qc_scATAC.Rd b/r/man/qc_scATAC.Rd index 476b9a7b..6eb8370c 100644 --- a/r/man/qc_scATAC.Rd +++ b/r/man/qc_scATAC.Rd @@ -50,9 +50,9 @@ re-calculate the ArchR QC stats. ArchR's \code{PromoterRatio} and \code{BlacklistRatio} are not included in the output, as they can be easily calculated from \code{ReadsInPromoter / nFrags} and \code{ReadsInBlacklist / nFrags}. Similarly, ArchR's \code{NucleosomeRatio} can be calculated -as \code{(monoNucleosomal + multiNucleosomal) / subNucleosomal}. } \examples{ +\dontrun{ ## Prep data frags <- get_demo_frags(subset = FALSE) reference_dir <- file.path(tempdir(), "references") @@ -69,3 +69,13 @@ blacklist <- read_encode_blacklist(reference_dir, genome = "hg38") ## Run qc head(qc_scATAC(frags, genes, blacklist)) } + +## Read precomputed +head(readr::read_delim( + file.path( + system.file("extdata", package = "BPCells"), + "qc_results_filtered_example_chr_4.tsv.gz" + ), + delim = "\t", show_col_types = FALSE +)) +} diff --git a/r/man/ragg_wrap.Rd b/r/man/ragg_wrap.Rd new file mode 100644 index 00000000..9d84a85b --- /dev/null +++ b/r/man/ragg_wrap.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackplots.R +\name{ragg_wrap} +\alias{ragg_wrap} +\title{Wrap \code{ragg::agg_png()} with optional one-shot height scaling} +\usage{ +ragg_wrap( + filename = "Rplot\%03d.png", + width = 480, + height = 480, + units = "px", + pointsize = 12, + background = "white", + res = 72, + scaling = 1, + snap_rect = TRUE, + bitsize = 8, + bg +) +} +\arguments{ +\item{filename}{The name of the file. Follows the same semantics as the file naming in \code{grDevices::png()}, +meaning that you can provide a sprintf() compliant string format to name multiple plots (such as the default value)} + +\item{width, height}{The dimensions of the device} + +\item{units}{The unit width and height is measured in, +in either pixels \code{('px')}, inches \code{('in')}, millimeters \code{('mm')}, or centimeter \code{('cm')}.} + +\item{pointsize}{The default pointsize of the device in pt. +This will in general not have any effect on grid graphics (including ggplot2) as text size is always set explicitly there.} + +\item{background}{The background color of the device} + +\item{res}{The resolution of the device. +This setting will govern how device dimensions given in inches, centimeters, or millimeters will be converted to pixels. +Further, it will be used to scale text sizes and linewidths} + +\item{scaling}{A scaling factor to apply to the rendered line width and text size. Useful for getting the right dimensions +at the resolution that you need. If e.g. you need to render a plot at 4000x3000 pixels for it to fit into a layout, +but you find that the result appears to small, you can increase the \code{scaling} argument to make everything appear bigger at the same resolution.} + +\item{snap_rect}{Should axis-aligned rectangles drawn with only fill snap to the pixel grid. +This will prevent anti-aliasing artifacts when two rectangles are touching at their border.} + +\item{bitsize}{Should the dvice record colour as 8 or 16bit} + +\item{bg}{Same as \code{background} for compatibility with old graphic device APIs`} +} +\value{ +A graphics device as returned by \code{ragg::agg_png()}. +} +\description{ +Use \code{scale_next_plot_height()} to temporarily adjust the height of the \emph{next} +call to \code{ragg_wrap()}. This is handy when a downstream plot (e.g., produced +by \code{trackplot_combine()}) renders too tall/short in a pipeline and you want a +quick scaling tweak without touching the plot code itself. The scaling factor +is applied once and then cleared. +} +\examples{ +# Create a simple plot using ragg_wrap +tmp_file <- tempfile(fileext = ".png") +ragg_wrap(tmp_file, width = 400, height = 300) +plot(1:10, 1:10) +dev.off() + +# Use scale_next_plot_height to adjust height +scale_next_plot_height(2) +tmp_file_scaled <- tempfile(fileext = ".png") +# Height will be effectively 600 +ragg_wrap(tmp_file_scaled, width = 400, height = 300) +plot(1:10, 1:10) +dev.off() + +# Clean up +unlink(c(tmp_file, tmp_file_scaled)) +} diff --git a/r/man/read_bed.Rd b/r/man/read_bed.Rd index 6e90d4d8..86530709 100644 --- a/r/man/read_bed.Rd +++ b/r/man/read_bed.Rd @@ -45,27 +45,28 @@ Read a bed file from disk or a url. \strong{read_encode_blacklist} -Downloads the Boyle Lab blacklist, as described in \url{https://doi.org/10.1038/s41598-019-45839-z} +Downloads the Boyle Lab blacklist, as described in \doi{10.1038/s41598-019-45839-z} } \examples{ ## Dummy bed file creation +file_name <- tempfile(fileext = ".bed") data.frame( chrom = rep("chr1", 6), start = seq(20, 121, 20), end = seq(39, 140, 20) -) \%>\% write.table("./references/example.bed", row.names = FALSE, col.names = FALSE, sep = "\t") +) \%>\% write.table(file_name, row.names = FALSE, col.names = FALSE, sep = "\t") ####################################################################### ## read_bed() example ####################################################################### -read_bed("./references/example.bed") +read_bed(file_name) ####################################################################### ## read_encode_blacklist() example ####################################################################### -read_encode_blacklist("./reference") +read_encode_blacklist(file.path(tempdir(), "references")) } diff --git a/r/man/read_gtf.Rd b/r/man/read_gtf.Rd index d7e6a5d6..7597375b 100644 --- a/r/man/read_gtf.Rd +++ b/r/man/read_gtf.Rd @@ -111,21 +111,50 @@ Read transcript models from GENCODE, for use with trackplot_gene() ####################################################################### ## read_gtf() example ####################################################################### +if (interactive()) { + species <- "Saccharomyces_cerevisiae" version <- "GCF_000146045.2_R64" head(read_gtf( - path = sprintf("./reference/\%s_genomic.gtf.gz", version), + path = sprintf("\%s/\%s_genomic.gtf.gz", file.path(tempdir(), "references"), version), backup_url = sprintf( "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/fungi/\%s/reference/\%s/\%s_genomic.gtf.gz", species, version, version ) )) +#> # A tibble: 6 x 9 +#> chr source feature start end score strand frame gene_id +#> +#> 1 NC_001133.9 RefSeq gene 1806 2169 . - . YAL068C +#> 2 NC_001133.9 RefSeq gene 2479 2707 . + . YAL067W-A +#> 3 NC_001133.9 RefSeq gene 7234 9016 . - . YAL067C +#> 4 NC_001133.9 RefSeq gene 11564 11951 . - . YAL065C +#> 5 NC_001133.9 RefSeq gene 12045 12426 . + . YAL064W-B +#> 6 NC_001133.9 RefSeq gene 13362 13743 . - . YAL064C-A +} ####################################################################### ## read_gencode_genes() example ####################################################################### -read_gencode_genes("./references", release = "42") +if (interactive()) { +read_gencode_genes(file.path(tempdir(), "references"), release = "42", timeout = 3000) +#> # A tibble: 39,319 x 11 +#> chr source feature start end score strand frame gene_id gene_type +#> +#> 1 chr1 HAVANA gene 11868 14409 . + . ENSG00000290... lncRNA +#> 2 chr1 HAVANA gene 29553 31109 . + . ENSG00000243... lncRNA +#> 3 chr1 HAVANA gene 34553 36081 . - . ENSG00000237... lncRNA +#> 4 chr1 HAVANA gene 57597 64116 . + . ENSG00000290... lncRNA +#> 5 chr1 HAVANA gene 65418 71585 . + . ENSG00000186... protein_... +#> 6 chr1 HAVANA gene 89294 133723 . - . ENSG00000238... lncRNA +#> 7 chr1 HAVANA gene 89550 91105 . - . ENSG00000239... lncRNA +#> 8 chr1 HAVANA gene 139789 140339 . - . ENSG00000239... lncRNA +#> 9 chr1 HAVANA gene 141473 173862 . - . ENSG00000241... lncRNA +#> 10 chr1 HAVANA gene 160445 161525 . + . ENSG00000241... lncRNA +#> # i 39,309 more rows +#> # i 1 more variable: gene_name +} ####################################################################### @@ -133,9 +162,24 @@ read_gencode_genes("./references", release = "42") ####################################################################### ## If read_gencode_genes() was already ran on the same release, ## will reuse previously downloaded annotations -read_gencode_transcripts("./references", release = "42") - - +if (interactive()) { +read_gencode_transcripts(file.path(tempdir(), "references"), release = "42", timeout = 3000) +#> # A tibble: 220,296 x 13 +#> chr source feature start end score strand frame gene_id gene_type +#> +#> 1 chr1 HAVANA transcript 65418 71585 . + . ENSG00000... protein_... +#> 2 chr1 HAVANA exon 65418 65433 . + . ENSG00000... protein_... +#> 3 chr1 HAVANA exon 65519 65573 . + . ENSG00000... protein_... +#> 4 chr1 HAVANA exon 69036 71585 . + . ENSG00000... protein_... +#> 5 chr1 HAVANA transcript 450739 451678 . - . ENSG00000... protein_... +#> 6 chr1 HAVANA exon 450739 451678 . - . ENSG00000... protein_... +#> 7 chr1 HAVANA transcript 685715 686654 . - . ENSG00000... protein_... +#> 8 chr1 HAVANA exon 685715 686654 . - . ENSG00000... protein_... +#> 9 chr1 HAVANA transcript 923922 944574 . + . ENSG00000... protein_... +#> 10 chr1 HAVANA exon 923922 924948 . + . ENSG00000... protein_... +#> # i 220,286 more rows +#> # i 3 more variables: gene_name , transcript_id , MANE_Select +} } \seealso{ \code{\link[=read_bed]{read_bed()}}, \code{\link[=read_encode_blacklist]{read_encode_blacklist()}} diff --git a/r/man/read_ucsc_chrom_sizes.Rd b/r/man/read_ucsc_chrom_sizes.Rd index 646e4488..669f7091 100644 --- a/r/man/read_ucsc_chrom_sizes.Rd +++ b/r/man/read_ucsc_chrom_sizes.Rd @@ -11,11 +11,21 @@ read_ucsc_chrom_sizes( timeout = 300 ) } +\arguments{ +\item{dir}{Output directory to cache the downloaded chrom sizes file} + +\item{genome}{Genome name. Defaults to hg38} + +\item{keep_chromosomes}{Regular expression with which chromosomes to keep. +Defaults to standard chromosomes (chr1-22, chrX, chrY)} + +\item{timeout}{Maximum time in seconds to wait for download from UCSC} +} \description{ Read chromosome sizes from UCSC and return as a tibble with one row per chromosome. The underlying data is pulled from here: \url{https://hgdownload.soe.ucsc.edu/downloads.html} } \examples{ -read_ucsc_chrom_sizes("./reference") +read_ucsc_chrom_sizes(file.path(tempdir(), "references")) } diff --git a/r/man/regress_out.Rd b/r/man/regress_out.Rd index 8d8e7148..b2a8422c 100644 --- a/r/man/regress_out.Rd +++ b/r/man/regress_out.Rd @@ -40,3 +40,19 @@ Generally, \code{n_features == ncol(latent_data)}, but for categorical variables category will be expanded into its own indicator variable. Memory usage will therefore be higher when using categorical input variables with many (i.e. >100) distinct values. } +\examples{ +set.seed(12345) +mat <- matrix(rnorm(1000 * 100), nrow = 1000, ncol = 100) +rownames(mat) <- paste0("gene", 1:1000) +colnames(mat) <- paste0("cell", 1:100) +mat <- as(mat, "dgCMatrix") \%>\% as("IterableMatrix") + +latent_data <- data.frame( + batch = sample(c("A", "B"), 100, replace = TRUE), + age = rnorm(100, mean = 30, sd = 10) +) + +# Regress out batch and age +res <- regress_out(mat, latent_data, prediction_axis = "row") + +} diff --git a/r/man/render_plot_from_storage.Rd b/r/man/render_plot_from_storage.Rd deleted file mode 100644 index bfcea1f6..00000000 --- a/r/man/render_plot_from_storage.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/trackplots.R -\name{render_plot_from_storage} -\alias{render_plot_from_storage} -\title{Render a plot with intermediate disk storage step} -\usage{ -render_plot_from_storage(plot, width, height) -} -\arguments{ -\item{plot}{(ggplot) ggplot output from a plotting function} - -\item{width}{(numeric) width of rendered plot} - -\item{height}{(numeric) height of rendered plot} -} -\description{ -Take a plotting object and save in temp storage, so it can be outputted with exact dimensions. -Primarily used to allow for adjusting plot dimensions within function reference examples. -} -\keyword{internal} diff --git a/r/man/rotate_x_labels.Rd b/r/man/rotate_x_labels.Rd index 64dfc634..b508760c 100644 --- a/r/man/rotate_x_labels.Rd +++ b/r/man/rotate_x_labels.Rd @@ -12,3 +12,14 @@ rotate_x_labels(degrees = 45) \description{ Rotate ggplot x axis labels } +\examples{ +mat <- get_demo_mat() +cell_types <- paste("Group", rep(1:3, length.out = length(colnames(mat)))) + +## Plot dot +scale_next_plot_height(0.8) +plot_dot(mat, c("MS4A1", "CD3E"), cell_types) + +scale_next_plot_height(0.8) +plot_dot(mat, c("MS4A1", "CD3E"), cell_types) + rotate_x_labels(90) +} diff --git a/r/man/scale_next_plot_height.Rd b/r/man/scale_next_plot_height.Rd new file mode 100644 index 00000000..f20b1f10 --- /dev/null +++ b/r/man/scale_next_plot_height.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trackplots.R +\name{scale_next_plot_height} +\alias{scale_next_plot_height} +\title{Temporarily scale the height of the next plot device} +\usage{ +scale_next_plot_height(scale) +} +\arguments{ +\item{scale}{Numeric multiplier applied to the \code{height} argument the next +time \code{ragg_wrap()} is called.} +} +\value{ +Returns the previous option value (as returned by \code{options()}). +} +\description{ +Stores a scaling factor that \code{ragg_wrap()} consumes once, letting you tweak +the rendered height of the next PNG device without modifying the plot code. +} +\examples{ +## Prep data +## Peaks generated from demo frags, as input into `call_peaks_tile()` +peaks <- tibble::tibble( + chr = factor(rep("chr4", 16)), + start = c(3041400, 3041733, 3037400, 3041933, 3040466, 3041200, + 3038200, 3038000, 3040266, 3037733, 3040800, 3042133, + 3038466, 3037200, 3043333, 3040066), + end = c(3041600, 3041933, 3037600, 3042133, 3040666, 3041400, + 3038400, 3038200, 3040466, 3037933, 3041000, 3042333, + 3038666, 3037400, 3043533, 3040266), + enrichment = c(46.4, 43.5, 28.4, 27.3, 17.3, 11.7, + 10.5, 7.95, 7.22, 6.86, 6.32, 6.14, + 5.96, 5.06, 4.51, 3.43) +) +region <- "chr4:3034877-3044877" + +## Plot peaks +scale_next_plot_height(0.3) +trackplot_genome_annotation(peaks, region, color_by = "enrichment") +} diff --git a/r/man/sctransform_pearson.Rd b/r/man/sctransform_pearson.Rd index 093bcd78..c300aab6 100644 --- a/r/man/sctransform_pearson.Rd +++ b/r/man/sctransform_pearson.Rd @@ -47,3 +47,18 @@ log-scale to represent the cell_read_counts and gene_beta variables. It also does not support the addition of arbitrary cell metadata (e.g. batch) to add to the negative binomial regression. } +\examples{ +set.seed(12345) +mat <- matrix(rpois(1000 * 100, lambda = 0.5), nrow = 1000, ncol = 100) +rownames(mat) <- paste0("gene", 1:1000) +colnames(mat) <- paste0("cell", 1:100) +mat <- as(mat, "dgCMatrix") \%>\% as("IterableMatrix") + +# Calculate dummy parameters +gene_theta <- runif(1000, 0.1, 10) +gene_beta <- runif(1000, 0.1, 10) +cell_read_counts <- runif(100, 1000, 10000) + +res <- sctransform_pearson(mat, gene_theta, gene_beta, cell_read_counts) + +} diff --git a/r/man/select_regions.Rd b/r/man/select_regions.Rd index 30b62c18..8d7e79b0 100644 --- a/r/man/select_regions.Rd +++ b/r/man/select_regions.Rd @@ -25,7 +25,7 @@ instead of only fragments overlapping the selected regions.} \item{zero_based_coords}{Whether to convert the ranges from a 1-based end-inclusive coordinate system to a 0-based end-exclusive coordinate system. Defaults to true for GRanges and false for other formats -(see this \href{https://web.archive.org/web/20210920203703/http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} +(see this \href{https://web.archive.org/web/20210920203703/https://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} } \value{ Fragments object filtered according to the selected regions diff --git a/r/man/svds-internal.Rd b/r/man/svds-internal.Rd new file mode 100644 index 00000000..8d060296 --- /dev/null +++ b/r/man/svds-internal.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matrix.R +\name{svds-internal} +\alias{svds-internal} +\alias{svds.default} +\alias{svds.IterableMatrix} +\title{Internal svds methods} +\usage{ +\method{svds}{default}(A, k, nu = k, nv = k, opts = list(), ...) + +\method{svds}{IterableMatrix}(A, k, nu = k, nv = k, opts = list(), threads = 0, ...) +} +\description{ +Internal svds methods +} +\keyword{internal} diff --git a/r/man/svds.Rd b/r/man/svds.Rd index 3121210d..9822a7a4 100644 --- a/r/man/svds.Rd +++ b/r/man/svds.Rd @@ -1,72 +1,44 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/matrix-svds-docs.R +% Please edit documentation in R/matrix.R \name{svds} \alias{svds} \title{Calculate svds} \usage{ -svds(A, k, nu = k, nv = k, opts = list(), threads=0L, ...) +svds(A, k, nu = k, nv = k, opts = list(), threads = 0L, ...) } \arguments{ \item{A}{The matrix whose truncated SVD is to be computed.} \item{k}{Number of singular values requested.} -\item{nu}{Number of right singular vectors to be computed. This must be between 0 and 'k'. (Must be equal to 'k' for BPCells IterableMatrix)} +\item{nu}{Number of left singular vectors to compute (must equal \code{k} for IterableMatrix).} -\item{opts}{Control parameters related to computing algorithm. See \emph{Details} below} +\item{nv}{Number of right singular vectors to compute (must equal \code{k} for IterableMatrix).} -\item{threads}{Control threads to use calculating mat-vec producs (BPCells specific)} +\item{opts}{Control parameters related to the computing algorithm (see Details).} + +\item{threads}{Threads to use for matrix-vector products (BPCells specific).} + +\item{...}{Additional arguments passed to \code{RSpectra::svds()} when running on non-BPCells matrices.} } \value{ -A list with the following components: -\item{d}{A vector of the computed singular values.} -\item{u}{An \code{m} by \code{nu} matrix whose columns contain -the left singular vectors. If \code{nu == 0}, \code{NULL} -will be returned.} -\item{v}{An \code{n} by \code{nv} matrix whose columns contain -the right singular vectors. If \code{nv == 0}, \code{NULL} -will be returned.} -\item{nconv}{Number of converged singular values.} -\item{niter}{Number of iterations used.} -\item{nops}{Number of matrix-vector multiplications used.} +A list with components \code{d}, \code{u}, \code{v}, \code{nconv}, \code{niter}, and \code{nops}. } \description{ -Use the C++ Spectra solver (same as RSpectra package), in order to -compute the largest k values and corresponding singular vectors. -Empirically, memory usage is much lower than using \code{irlba::irlba()}, likely -due to avoiding R garbage creation while solving due to the pure-C++ solver. -This documentation is a slightly-edited version of the \code{RSpectra::svds()} -documentation. +Use the C++ Spectra solver (same as RSpectra package) to compute the largest +\code{k} singular values and corresponding singular vectors. Empirically, memory +use is much lower than \code{irlba::irlba()} because the solver runs entirely in C++. } \details{ -When RSpectra is installed, this function will just add a method to -\code{RSpectra::svds()} for the \code{IterableMatrix} class. - -The \code{opts} argument is a list that can supply any of the -following parameters: +When RSpectra is installed, this adds a method to \code{RSpectra::svds()} for +\code{IterableMatrix} objects; otherwise it falls back to the bundled implementation. -\describe{ -\item{\code{ncv}}{Number of Lanzcos basis vectors to use. More vectors -will result in faster convergence, but with greater -memory use. \code{ncv} must be satisfy -\eqn{k < ncv \le p}{k < ncv <= p} where -\code{p = min(m, n)}. -Default is \code{min(p, max(2*k+1, 20))}.} -\item{\code{tol}}{Precision parameter. Default is 1e-10.} -\item{\code{maxitr}}{Maximum number of iterations. Default is 1000.} -\item{\code{center}}{Either a logical value (\code{TRUE}/\code{FALSE}), or a numeric -vector of length \eqn{n}. If a vector \eqn{c} is supplied, then -SVD is computed on the matrix \eqn{A - 1c'}{A - 1 * c'}, -in an implicit way without actually forming this matrix. -\code{center = TRUE} has the same effect as -\code{center = colMeans(A)}. Default is \code{FALSE}. Ignored in BPCells} -\item{\code{scale}}{Either a logical value (\code{TRUE}/\code{FALSE}), or a numeric -vector of length \eqn{n}. If a vector \eqn{s} is supplied, then -SVD is computed on the matrix \eqn{(A - 1c')S}{(A - 1 * c')S}, -where \eqn{c} is the centering vector and \eqn{S = diag(1/s)}. -If \code{scale = TRUE}, then the vector \eqn{s} is computed as -the column norm of \eqn{A - 1c'}{A - 1 * c'}. -Default is \code{FALSE}. Ignored in BPCells} +The \code{opts} list may include: +\itemize{ +\item \code{ncv}: Number of Lanczos basis vectors (\verb{k < ncv <= min(m, n)}). Default \code{min(p, max(2*k+1, 20))}. +\item \code{tol}: Precision parameter (default \code{1e-10}). +\item \code{maxitr}: Maximum iterations (default \code{1000}). +\item \code{center}, \code{scale}: Centering/scaling options (ignored for BPCells). } } \examples{ @@ -76,17 +48,15 @@ colnames(mat) <- paste0("cell", seq_len(10)) mat <- mat \%>\% as("dgCMatrix") \%>\% as("IterableMatrix") svd_res <- svds(mat, k = 5) - names(svd_res) - svd_res$d - dim(svd_res$u) - dim(svd_res$v) -# Can also pass in values directly into RSpectra::svds -svd_res <- svds(mat, k = 5, opts=c(maxitr = 500)) + +# Can also pass values directly into RSpectra::svds +svd_res <- svds(mat, k = 5, opts = c(maxitr = 500)) + } \references{ -Qiu Y, Mei J (2022). \emph{RSpectra: Solvers for Large-Scale Eigenvalue and SVD Problems}. R package version 0.16-1, \url{https://CRAN.R-project.org/package=RSpectra}. +Qiu Y, Mei J (2022). RSpectra: Solvers for Large-Scale Eigenvalue and SVD Problems. R package version 0.16-1. } diff --git a/r/man/tile_matrix.Rd b/r/man/tile_matrix.Rd index 1e9c5f90..fdf4187b 100644 --- a/r/man/tile_matrix.Rd +++ b/r/man/tile_matrix.Rd @@ -29,7 +29,7 @@ Must be non-overlapping and sorted by \item{zero_based_coords}{Whether to convert the ranges from a 1-based end-inclusive coordinate system to a 0-based end-exclusive coordinate system. Defaults to true for GRanges and false for other formats -(see this \href{https://web.archive.org/web/20210920203703/http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} +(see this \href{https://web.archive.org/web/20210920203703/https://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} \item{explicit_tile_names}{Boolean for whether to add rownames to the output matrix in format e.g chr1:500-1000, where start and end coords are given in a 0-based coordinate system. For diff --git a/r/man/trackplot_bulk.Rd b/r/man/trackplot_bulk.Rd index a65a1a01..e69ef606 100644 --- a/r/man/trackplot_bulk.Rd +++ b/r/man/trackplot_bulk.Rd @@ -24,7 +24,7 @@ trackplot_bulk( \item{fragments}{Fragments object} \item{region}{GRanges of length 1 with region to plot, or list/data.frame with -one entry each for chr, start, end. See \code{gene_region()} or \link{genomic-ranges} for details} +one entry each for chr, start, end. See \code{gene_region()} or \link{genomic-ranges-like} for details} \item{groups}{Vector with one entry per cell, specifying the cell's group} @@ -44,7 +44,7 @@ just the most extreme outliers across the region. NULL to disable clipping} \item{zero_based_coords}{Whether to convert the ranges from a 1-based end-inclusive coordinate system to a 0-based end-exclusive coordinate system. Defaults to true for GRanges and false for other formats -(see this \href{https://web.archive.org/web/20210920203703/http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} +(see this \href{https://web.archive.org/web/20210920203703/https://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} \item{return_data}{If true, return data from just before plotting rather than a plot.} @@ -67,4 +67,7 @@ This function has been renamed to \code{trackplot_coverage()} Plot a pseudobulk genome track, showing the number of fragment insertions across a region. } +\examples{ +# See ?trackplot_coverage for examples +} \keyword{internal} diff --git a/r/man/trackplot_calculate_segment_height.Rd b/r/man/trackplot_calculate_segment_height.Rd index 7d785c2f..41e299d8 100644 --- a/r/man/trackplot_calculate_segment_height.Rd +++ b/r/man/trackplot_calculate_segment_height.Rd @@ -2,14 +2,7 @@ % Please edit documentation in R/trackplots.R \name{trackplot_calculate_segment_height} \alias{trackplot_calculate_segment_height} -\title{Calculate y positions for trackplot segments to avoid overlap -Steps: -\enumerate{ -\item Calculate the maximum overlap depth of transcripts -\item Iterate through start/end of segments in sorted order -\item Randomly assign each segment a y-coordinate between 1 and max overlap depth, -with the restriction that a segment can't have the same y-coordinate as an overlapping segment -}} +\title{Calculate y positions for trackplot segments to avoid overlap} \usage{ trackplot_calculate_segment_height(data) } @@ -17,16 +10,15 @@ trackplot_calculate_segment_height(data) \item{data}{tibble of genome ranges with start and end columns, assumed to be on same chromosome.} } \value{ -Vector of y coordinates, one per input row, such that no ranges at the same y coordinate overlap +Vector of y coordinates, one per input row, such that no ranges at the same y coordinate overlap. } \description{ -Calculate y positions for trackplot segments to avoid overlap Steps: \enumerate{ \item Calculate the maximum overlap depth of transcripts \item Iterate through start/end of segments in sorted order \item Randomly assign each segment a y-coordinate between 1 and max overlap depth, -with the restriction that a segment can't have the same y-coordinate as an overlapping segment +with the restriction that a segment cannot have the same y-coordinate as an overlapping segment } } \keyword{internal} diff --git a/r/man/trackplot_combine.Rd b/r/man/trackplot_combine.Rd index a18d0b34..f6774fc4 100644 --- a/r/man/trackplot_combine.Rd +++ b/r/man/trackplot_combine.Rd @@ -33,6 +33,7 @@ Combines multiple track plots of the same region into a single grid. Uses the \code{patchwork} package to perform the alignment. } \examples{ +\dontrun{ ## Prep data frags <- get_demo_frags() @@ -40,31 +41,48 @@ frags <- get_demo_frags() genes <- read_gencode_transcripts( file.path(tempdir(), "references"), release = "42", annotation_set = "basic", - features = "transcript" + features = "transcript", timeout = 3000 ) blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags +} + +## We use pre-generated data for this example +frags <- get_demo_frags() region <- "chr4:3034877-4034877" cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) -transcripts <- read_gencode_transcripts( - file.path(tempdir(), "references"), release = "42", - annotation_set = "basic" +genes <- readr::read_delim( + file.path( + system.file("extdata", package = "BPCells"), + "transcripts_filtered_example_chr_4.tsv.gz"), + delim = "\t", show_col_types = FALSE ) +read_counts <- readr::read_delim( + file.path( + system.file("extdata", package = "BPCells"), + "qc_results_filtered_example_chr_4.tsv.gz"), + delim = "\t", show_col_types = FALSE +)$nFrags region <- "chr4:3034877-4034877" ## Get all trackplots and scalebars to combine plot_scalebar <- trackplot_scalebar(region) -plot_gene <- trackplot_gene(transcripts, region) -plot_coverage <- trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts) - +plot_gene <- trackplot_gene(genes, region) +plot_coverage <- trackplot_coverage( + frags, + region, + groups = cell_types, + cell_read_counts = read_counts +) ## Combine trackplots and render ## Also remove colors from gene track -plot <- trackplot_combine( +scale_next_plot_height(0.6) +trackplot_combine( list(plot_scalebar, plot_coverage, plot_gene + ggplot2::guides(color = "none")) ) -BPCells:::render_plot_from_storage(plot, width = 6, height = 4) + } \seealso{ \code{trackplot_coverage()}, \code{trackplot_gene()}, \code{trackplot_loop()}, \code{trackplot_scalebar()} diff --git a/r/man/trackplot_coverage.Rd b/r/man/trackplot_coverage.Rd index 9abf0fe1..6516453b 100644 --- a/r/man/trackplot_coverage.Rd +++ b/r/man/trackplot_coverage.Rd @@ -42,11 +42,9 @@ just the most extreme outliers across the region. NULL to disable clipping} \item{zero_based_coords}{Whether to convert the ranges from a 1-based end-inclusive coordinate system to a 0-based end-exclusive coordinate system. Defaults to true for GRanges and false for other formats -(see this \href{https://web.archive.org/web/20210920203703/http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} +(see this \href{https://web.archive.org/web/20210920203703/https://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/}{archived UCSC blogpost})} \item{return_data}{If true, return data from just before plotting rather than a plot.} - -\item{scale_bar}{Whether to include a scale bar in the top track (\code{TRUE} or \code{FALSE})} } \value{ Returns a combined plot of pseudobulk genome tracks. For compatability with @@ -59,23 +57,35 @@ Plot a pseudobulk genome track, showing the number of fragment insertions across a region for each cell type or group. } \examples{ +## Prep data frags <- get_demo_frags() +cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) +\dontrun{ ## Use genes and blacklist to determine proper number of reads per cell genes <- read_gencode_transcripts( file.path(tempdir(), "references"), release = "42", annotation_set = "basic", - features = "transcript" + features = "transcript", timeout = 3000 ) blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") + read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags +} region <- "chr4:3034877-4034877" -cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) + +## We use pre-generated data for this example +read_counts <- readr::read_delim( + file.path(system.file("extdata", package = "BPCells"), + "qc_results_filtered_example_chr_4.tsv.gz"), + delim = "\t", show_col_types = FALSE +)$nFrags -BPCells:::render_plot_from_storage( - trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts), - width = 6, height = 3 +scale_next_plot_height(0.5) +trackplot_coverage( + frags, region, groups = cell_types, + cell_read_counts = read_counts ) } \seealso{ diff --git a/r/man/trackplot_gene.Rd b/r/man/trackplot_gene.Rd index 96285492..fa51a562 100644 --- a/r/man/trackplot_gene.Rd +++ b/r/man/trackplot_gene.Rd @@ -35,11 +35,9 @@ or list/data.frame/GRanges of length 1 specifying chr, start, end. See \code{hel \item{label_size}{size for transcript labels in units of mm} -\item{return_data}{If true, return data from just before plotting rather than a plot.} - -\item{labels}{Character vector with labels for each item in transcripts. NA for items that should not be labeled} +\item{track_label}{Label to put on the side of the track} -\item{transcript_size}{size for transcript lines in units of mm} +\item{return_data}{If true, return data from just before plotting rather than a plot.} } \value{ Plot of gene locations @@ -48,17 +46,24 @@ Plot of gene locations Plot transcript models } \examples{ +\dontrun{ ## Prep data transcripts <- read_gencode_transcripts( file.path(tempdir(), "references"), release = "42", - annotation_set = "basic", features = "transcript" + annotation_set = "basic", features = "transcript", timeout = 3000 ) -region <- "chr4:3034877-4034877" +} + +## We use pre-generated data for this example +transcripts <- readr::read_delim( + file.path(system.file("extdata", package = "BPCells"), + "transcripts_filtered_example_chr_4.tsv.gz")) +region <- "chr4:3264877-3634877" ## Plot gene trackplot -plot <- trackplot_gene(transcripts, region) -BPCells:::render_plot_from_storage(plot, width = 6, height = 1) +scale_next_plot_height(0.3) +trackplot_gene(transcripts, region) } \seealso{ \code{trackplot_combine()}, \code{trackplot_coverage()}, \code{trackplot_loop()}, \code{trackplot_scalebar()} diff --git a/r/man/trackplot_genome_annotation.Rd b/r/man/trackplot_genome_annotation.Rd index f5bf7129..8e9f66ff 100644 --- a/r/man/trackplot_genome_annotation.Rd +++ b/r/man/trackplot_genome_annotation.Rd @@ -39,6 +39,8 @@ otherwise it is interpreted as a discrete color palette in \code{ggplot2::scale_ \item{annotation_size}{size for annotation lines in mm} +\item{track_label}{Label to put on the side of the track} + \item{return_data}{If true, return data from just before plotting rather than a plot.} } \value{ @@ -65,10 +67,8 @@ peaks <- tibble::tibble( region <- "chr4:3034877-3044877" ## Plot peaks -BPCells:::render_plot_from_storage( - trackplot_genome_annotation(peaks, region, color_by = "enrichment"), - width = 6, height = 1 -) +scale_next_plot_height(0.3) +trackplot_genome_annotation(peaks, region, color_by = "enrichment") } \seealso{ \code{trackplot_combine()}, \code{trackplot_coverage()}, \code{trackplot_loop()}, \code{trackplot_scalebar()}, \code{trackplot_gene()} diff --git a/r/man/trackplot_loop.Rd b/r/man/trackplot_loop.Rd index 5ebbb94b..9f87c832 100644 --- a/r/man/trackplot_loop.Rd +++ b/r/man/trackplot_loop.Rd @@ -33,6 +33,8 @@ otherwise it is interpreted as a discrete color palette in \code{ggplot2::scale_ \item{curvature}{Curvature value between 0 and 1. 1 is a 180-degree arc, and 0 is flat lines.} +\item{track_label}{Label to put on the side of the track} + \item{return_data}{If true, return data from just before plotting rather than a plot.} } \value{ @@ -52,8 +54,8 @@ loops <- tibble::tibble( region <- "chr4:3034877-4034877" ## Plot loops -plot <- trackplot_loop(loops, region, color_by = "score") -BPCells:::render_plot_from_storage(plot, width = 6, height = 1.5) +scale_next_plot_height(0.3) +trackplot_loop(loops, region, color_by = "score") } \seealso{ \code{trackplot_combine()}, \code{trackplot_coverage()}, \code{trackplot_gene()}, \code{trackplot_scalebar()}, \code{trackplot_genome_annotation()} diff --git a/r/man/trackplot_scalebar.Rd b/r/man/trackplot_scalebar.Rd index f52f663e..f958f423 100644 --- a/r/man/trackplot_scalebar.Rd +++ b/r/man/trackplot_scalebar.Rd @@ -20,9 +20,8 @@ Plots a human-readable scale bar and coordinates of the region being plotted } \examples{ region <- "chr4:3034877-3044877" -BPCells:::render_plot_from_storage( - trackplot_scalebar(region), width = 6, height = 1 -) +scale_next_plot_height(0.3) +trackplot_scalebar(region) } \seealso{ \code{trackplot_combine()}, \code{trackplot_coverage()}, \code{trackplot_gene()}, \code{trackplot_loop()} diff --git a/r/man/trackplot_utils.Rd b/r/man/trackplot_utils.Rd index 5a4cb1c7..66bf4351 100644 --- a/r/man/trackplot_utils.Rd +++ b/r/man/trackplot_utils.Rd @@ -31,3 +31,11 @@ get_trackplot_height(plot) Adjust labels and heights on trackplots. Labels are set as facet labels in ggplot2, and heights are additional properties read by \code{trackplot_combine()} to determine relative height of input plots. } +\examples{ +region <- "chr4:3034877-3044877" +plot <- trackplot_scalebar(region) +plot_labeled <- set_trackplot_label(plot, "Scalebar") + +plot_height_set <- set_trackplot_height(plot, 2) +get_trackplot_height(plot_height_set) +} diff --git a/r/man/transpose_storage_order.Rd b/r/man/transpose_storage_order.Rd index 29b4021e..cd4b8348 100644 --- a/r/man/transpose_storage_order.Rd +++ b/r/man/transpose_storage_order.Rd @@ -47,7 +47,8 @@ mat ## A regular transpose operation switches a user's rows and cols t(mat) -## Running `transpose_storage_order()` instead changes whether the storage is in row-major or col-major, +## Running `transpose_storage_order()` instead changes whether +## the storage is in row-major or col-major, ## but does not switch the rows and cols transpose_storage_order(mat) } diff --git a/r/man/write_insertion_bedgraph.Rd b/r/man/write_insertion_bedgraph.Rd index c8767023..38b5e96a 100644 --- a/r/man/write_insertion_bedgraph.Rd +++ b/r/man/write_insertion_bedgraph.Rd @@ -63,6 +63,7 @@ Beds only hold chrom, start, and end data, while bedGraphs also provide a score This reports the total number of insertions at each basepair for each group listed in \code{cell_groups}. } \examples{ +\dontrun{ ## Prep data frags <- get_demo_frags() bedgraph_outputs <- file.path(tempdir(), "bedgraph_outputs") @@ -75,7 +76,7 @@ write_insertion_bedgraph(frags, file.path(bedgraph_outputs, "all.tar.gz")) list.files(bedgraph_outputs) # With tiling -chrom_sizes <- read_ucsc_chrom_sizes("./reference", genome="hg38") \%>\% +chrom_sizes <- read_ucsc_chrom_sizes(file.path(tempdir(), "references"), genome="hg38") \%>\% dplyr::filter(chr \%in\% c("chr4", "chr11")) write_insertion_bedgraph(frags, file.path(bedgraph_outputs, "all_tiled.bedGraph"), chrom_sizes = chrom_sizes, normalization_method = "cpm", tile_width = 100) @@ -83,12 +84,20 @@ reads <- readr::read_tsv(file.path(bedgraph_outputs, "all_tiled.bedGraph"), col_names = c("chr", "start", "end", "score"), show_col_types = FALSE) head(reads) - - +#> A tibble: 6 × 4 +#> chr start end score +#> +#> 1 chr4 10000 10100 1.45 +#> 2 chr4 10100 10200 0.869 +#> 3 chr4 10300 10400 0.290 +#> 4 chr4 10400 10500 0.145 +#> 5 chr4 10600 10700 0.434 +#> 6 chr4 11100 11200 0.145 +} ###################################################### ## `write_insertion_bed()` examples ###################################################### - +\dontrun{ # We utilize two groups this time bed_outputs <- file.path(tempdir(), "bed_outputs") cell_groups <- rep(c("A", "B"), length.out = length(cellNames(frags))) @@ -98,9 +107,27 @@ write_insertion_bed( frags, path = bed_paths, cell_groups = cell_groups, verbose = TRUE ) +#> 2026-01-08 21:19:41 Writing bed file for cluster: A +#> 2026-01-08 21:19:41 Bed file for cluster: A written to: +#> /tmp/RtmpgF9rbP/bed_outputs/A.bed +#> 2026-01-08 21:19:41 Writing bed file for cluster: B +#> 2026-01-08 21:19:42 Bed file for cluster: B written to: +#> /tmp/RtmpgF9rbP/bed_outputs/B.bed +#> 2026-01-08 21:19:42 Finished writing bed files list.files(bed_outputs) +#> [1] "A.bed" "B.bed" head(readr::read_tsv( file.path(bed_outputs, "A.bed"), col_names = c("chr", "start", "end"), show_col_types = FALSE) ) +#> # A tibble: 6 × 3 +#> chr start end +#> +#> 1 chr4 10035 10036 +#> 2 chr4 10045 10046 +#> 3 chr4 10045 10046 +#> 4 chr4 10046 10047 +#> 5 chr4 10046 10047 +#> 6 chr4 10066 10067 +} } diff --git a/r/pkgdown/_pkgdown.yml b/r/pkgdown/_pkgdown.yml index a8b2f2e9..7ac4a3ea 100644 --- a/r/pkgdown/_pkgdown.yml +++ b/r/pkgdown/_pkgdown.yml @@ -139,8 +139,11 @@ reference: - checksum - apply_by_row - regress_out - - IterableMatrix-methods - pseudobulk_matrix + - IterableMatrix-methods-core + - IterableMatrix-methods-ops + - IterableMatrix-methods-stats + - title: "Reference Annotations" - contents: @@ -189,7 +192,12 @@ reference: - discrete_palette - collect_features - rotate_x_labels + - ragg_wrap + - scale_next_plot_height - title: "Data" - contents: - get_demo_mat + +figures: + dev: BPCells::ragg_wrap diff --git a/r/src/Makevars.in b/r/src/Makevars.in index c816cc37..ef312ca9 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -1,9 +1,10 @@ -# CXX_STD = CXX17 - -PKG_CXXFLAGS = %HDF5_CFLAGS% %HWY_CFLAGS% %ENV_CFLAGS% -Ivendor -std=c++17 -DRCPP_EIGEN -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS -Wno-ignored-attributes -Wno-unknown-pragmas # -Wall -Wextra -Wpedantic +CXX_STD = CXX17 +PKG_CXXFLAGS = %HDF5_CFLAGS% %HWY_CFLAGS% %ENV_CFLAGS% -Ivendor -DRCPP_EIGEN -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS # -Wall -Wextra -Wpedantic PKG_LIBS = -lz %HDF5_LIBS% %CXX_FS_FLAG% %HWY_LIBS% %ENV_LDFLAGS% -# PKG_CXXFLAGS = -Wno-deprecated-declarations -Wno-unused-but-set-variable -fsanitize=undefined %HDF5_CFLAGS% %HWY_CFLAGS% -std=c++17 -DRCPP_EIGEN -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS -Wno-ignored-attributes -Wno-unknown-pragmas # -Wall -Wextra -Wpedantic + + +# PKG_CXXFLAGS = -Wno-deprecated-declarations -Wno-unused-but-set-variable -fsanitize=undefined %HDF5_CFLAGS% %HWY_CFLAGS% -DRCPP_EIGEN -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS # -Wall -Wextra -Wpedantic # PKG_LIBS = -fsanitize=undefined -lz %HDF5_LIBS% %CXX_FS_FLAG% %HWY_LIBS% OBJECTS=\ @@ -90,3 +91,5 @@ vendor/md5/md5.o \ # matrixIterators/MatrixOps.o \ # matrixIterators/PackedMatrix.o \ # matrixIterators/UnpackedMatrix.o \ + + diff --git a/r/src/vendor/highway/hwy/base.h b/r/src/vendor/highway/hwy/base.h index 9d74f2b7..654140e8 100644 --- a/r/src/vendor/highway/hwy/base.h +++ b/r/src/vendor/highway/hwy/base.h @@ -214,9 +214,24 @@ namespace hwy { // 4 instances of a given literal value, useful as input to LoadDup128. #define HWY_REP4(literal) literal, literal, literal, literal +#ifndef HWY_NO_ABORT HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) Abort(const char* file, int line, const char* format, ...); +#endif + +#ifdef HWY_NO_ABORT +#define HWY_ABORT(format, ...) \ + do { \ + (void)(format); \ + (void)(__FILE__); \ + (void)(__LINE__); \ + } while (0) +#define HWY_ASSERT(condition) \ + do { \ + (void)(condition); \ + } while (0) +#else #define HWY_ABORT(format, ...) \ ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__) @@ -227,6 +242,7 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) HWY_ABORT("Assert %s", #condition); \ } \ } while (0) +#endif #if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER) #define HWY_IS_MSAN 1 diff --git a/r/src/vendor/highway/hwy/targets.cc b/r/src/vendor/highway/hwy/targets.cc index e68f754d..a98d1192 100644 --- a/r/src/vendor/highway/hwy/targets.cc +++ b/r/src/vendor/highway/hwy/targets.cc @@ -469,12 +469,13 @@ int64_t DetectTargets() { if ((bits & HWY_ENABLED_BASELINE) != HWY_ENABLED_BASELINE) { const uint64_t bits_u = static_cast(bits); const uint64_t enabled = static_cast(HWY_ENABLED_BASELINE); - fprintf(stderr, + (void)bits_u; (void)enabled; + /*fprintf(stderr, "WARNING: CPU supports 0x%08x%08x, software requires 0x%08x%08x\n", static_cast(bits_u >> 32), static_cast(bits_u & 0xFFFFFFFF), static_cast(enabled >> 32), - static_cast(enabled & 0xFFFFFFFF)); + static_cast(enabled & 0xFFFFFFFF));*/ } return bits; @@ -482,6 +483,7 @@ int64_t DetectTargets() { } // namespace +#ifndef HWY_NO_ABORT HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) Abort(const char* file, int line, const char* format, ...) { char buf[800]; @@ -509,6 +511,7 @@ HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) abort(); // Compile error without this due to HWY_NORETURN. #endif } +#endif // HWY_NO_ABORT HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) { supported_mask_ = static_cast(~disabled_targets); diff --git a/r/src/vendor/highway/manual-build/README.md b/r/src/vendor/highway/manual-build/README.md index 9d98c694..02d4b11c 100644 --- a/r/src/vendor/highway/manual-build/README.md +++ b/r/src/vendor/highway/manual-build/README.md @@ -1,2 +1,21 @@ -This folder is not part of the original Highway distribution, and is instead provided to simplify integration -with non-cmake build systems \ No newline at end of file +# Highway Build Customization for BPCells + +This directory contains scripts and configurations for building the `highway` SIMD library, vendored within the BPCells project. + +The original `highway` library was modified to address CRAN check warnings related to `abort()` calls and `stderr` output. These modifications ensure compliance with CRAN policies, which disallow compiled code from terminating R or writing directly to standard error/output streams. + +## Modifications: + +1. **`src/vendor/highway/manual-build/build_highway.sh`**: + * The `HWY_FLAGS` were updated to include `-DHWY_NO_ABORT`. This preprocessor definition prevents the `highway` library from including `abort()` calls in its compiled output when the relevant code is conditionally compiled. + * `hwy/nanobenchmark.cc` and `hwy/print.cc` were removed from `HWY_SOURCES`. These files contained `fprintf(stderr, ...)` and `printf(...)` calls, which are not permitted by CRAN. These components are not essential for BPCells. + +2. **`src/vendor/highway/hwy/targets.cc`**: + * The `Abort` function definition was wrapped with `#ifndef HWY_NO_ABORT ... #endif`. This ensures that the `Abort` function (which calls `abort()` and `fprintf(stderr, ...)`) is not compiled into `libhwy.a` when `-DHWY_NO_ABORT` is defined. + * The `fprintf(stderr, ...)` warning message within the `DetectTargets` function was commented out to prevent `stderr` output during runtime target detection. + +3. **`src/vendor/highway/hwy/base.h`**: + * Added conditional compilation logic (`#ifndef HWY_NO_ABORT`) around the `Abort` function declaration. + * Added alternative definitions for `HWY_ABORT` and `HWY_ASSERT` macros when `HWY_NO_ABORT` is defined, making them no-ops. This allows the code to compile without linking to the `Abort` function when the flag is set. + +These changes are necessary to pass CRAN checks and should be reapplied if the vendored `highway` version is updated. \ No newline at end of file diff --git a/r/src/vendor/highway/manual-build/build_highway.sh b/r/src/vendor/highway/manual-build/build_highway.sh index 1291c138..420cdc64 100644 --- a/r/src/vendor/highway/manual-build/build_highway.sh +++ b/r/src/vendor/highway/manual-build/build_highway.sh @@ -1,6 +1,20 @@ #!/usr/bin/env bash -set -euo pipefail +if [ -z "${BASH_VERSION:-}" ]; then + if command -v bash >/dev/null 2>&1; then + exec bash "$0" "$@" + else + echo "Error: bash is required to run this script" >&2 + exit 1 + fi +fi + +set -eu +if command -v bash >/dev/null 2>&1 && [ -n "$BASH_VERSION" ]; then + set -o pipefail +else + (set -o pipefail >/dev/null 2>&1) && set -o pipefail || true +fi # This is a fairly direct translation of the google/highway CMakeLists.txt from version 1.0.5 # https://github.com/google/highway/blob/1.0.5/CMakeLists.txt @@ -23,14 +37,13 @@ HWY_FLAGS=( -O2 -g -DNDEBUG + -DHWY_NO_ABORT ) # Skip the CONTRIB, since we don't need sorting or image libraries HWY_SOURCES=( hwy/aligned_allocator.cc - hwy/nanobenchmark.cc hwy/per_target.cc - hwy/print.cc hwy/targets.cc ) diff --git a/r/tests/old_compiler_envs/Dockerfile b/r/tests/old_compiler_envs/Dockerfile index 1d839fba..59d64077 100644 --- a/r/tests/old_compiler_envs/Dockerfile +++ b/r/tests/old_compiler_envs/Dockerfile @@ -25,7 +25,7 @@ ENV LANG en_US.UTF-8 ## Install backported R 4.0 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-key '95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7' && \ - echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran40/" >> /etc/apt/sources.list + echo "deb https://cloud.r-project.org/bin/linux/debian buster-cran40/" >> /etc/apt/sources.list ENV R_BASE_VERSION=4.2.2 diff --git a/r/vignettes/pbmc3k.Rmd b/r/vignettes/pbmc3k.Rmd index c2b98bd2..b897e538 100644 --- a/r/vignettes/pbmc3k.Rmd +++ b/r/vignettes/pbmc3k.Rmd @@ -239,7 +239,7 @@ the name of a directory to save the files in. This also allows us to skip re-dow the same files next time. ```{r} genes <- read_gencode_transcripts( - "./references", + file.path(tempdir(), "references"), release="42", transcript_choice="MANE_Select", annotation_set = "basic", @@ -249,12 +249,12 @@ head(genes) ``` ```{r} -blacklist <- read_encode_blacklist("./references", genome="hg38") +blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") head(blacklist) ``` ```{r} -chrom_sizes <- read_ucsc_chrom_sizes("./references", genome="hg38") +chrom_sizes <- read_ucsc_chrom_sizes(file.path(tempdir(), "referencs"), genome="hg38") head(chrom_sizes) ``` @@ -778,7 +778,7 @@ coverage_plot This is much more useful with a gene annotation track added in. For this we'll get a set of canonical transcripts (one per gene) from Gencode ```{r} -transcripts <- read_gencode_transcripts("./references", release="42") +transcripts <- read_gencode_transcripts("references", release="42") head(transcripts) ```