Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions r/R/genomeAnnotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,16 @@ canonical_gene_symbol <- function(query, gene_mapping = human_gene_mapping) {
#' @param extend_bp Bases to extend region upstream and downstream of gene. If length 1, extension is
#' symmetric. If length 2, provide upstream extension then downstream extension as positive distances.
#' @return List of chr, start, end positions for use with trackplot functions.
#' @examples
#' ## Prep data
#' genes <- read_gencode_transcripts(
#' file.path(tempdir(), "references"), release = "42",
#' annotation_set = "basic",
#' features = "transcript"
#' )
#'
#' ## Get gene region
#' gene_region(genes, "CD19", extend_bp = 1e5)
#' @export
gene_region <- function(genes, gene_symbol, extend_bp = c(1e4, 1e4), gene_mapping = human_gene_mapping) {
genes <- normalize_ranges(genes, metadata_cols = c("strand", "gene_name"))
Expand Down
88 changes: 88 additions & 0 deletions r/R/plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ continuous_palette <- function(name) {
#' @param return_data If true, return data from just before plotting rather than a plot.
#' @param apply_styling If false, return a plot without pretty styling applied
#' @return ggplot2 plot object
#' @examples
#' ## Prep data
#' mat <- get_demo_mat(filter_qc = FALSE, subset = FALSE)
#' reads_per_cell <- colSums(mat)
#'
#' # Render knee plot
#' plot_read_count_knee(reads_per_cell, cutoff = 1e3)
#' @export
plot_read_count_knee <- function(read_counts, cutoff = NULL, return_data = FALSE, apply_styling = TRUE) {
# Keep ~1k cells per order of magnitude to speed up plotting
Expand Down Expand Up @@ -178,6 +185,20 @@ plot_read_count_knee <- function(read_counts, cutoff = NULL, return_data = FALSE
#' @param min_tss Minimum TSS Enrichment cutoff
#' @param bins Number of bins for density calculation
#' @inheritParams plot_embedding
#' @examples
#' ## Prep data
#' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE)
#' genes <- read_gencode_transcripts(
#' file.path(tempdir(), "references"), release = "42",
#' annotation_set = "basic",
#' features = "transcript"
#' )
#' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38")
#' atac_qc <- qc_scATAC(frags, genes, blacklist)
#'
#'
#' ## Render tss enrichment vs fragment plot
#' plot_tss_scatter(atac_qc, min_frags = 1000, min_tss = 10)
#' @export
plot_tss_scatter <- function(atac_qc, min_frags = NULL, min_tss = NULL, bins = 100, apply_styling = TRUE) {
assert_is(atac_qc, "data.frame")
Expand Down Expand Up @@ -255,6 +276,9 @@ plot_tss_scatter <- function(atac_qc, min_frags = NULL, min_tss = NULL, bins = 1
#' @param max_length Maximum length to show on the plot
#' @inheritParams plot_embedding
#' @return Numeric vector where index i contans the number of length-i fragments
#' @examples
#' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE)
#' plot_fragment_length(frags)
#' @export
plot_fragment_length <- function(fragments, max_length = 500, return_data = FALSE, apply_styling = TRUE) {
assert_is(fragments, "IterableFragments")
Expand Down Expand Up @@ -297,6 +321,17 @@ plot_fragment_length <- function(fragments, max_length = 500, return_data = FALS
#' @param genes Coordinate ranges for genes (must include strand)
#' @param smooth Number of bases to smooth over (rolling average)
#' @seealso `footprint()`, `plot_tf_footprint()`
#' @examples
#' ## Prep data
#' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE)
#' genes <- read_gencode_transcripts(
#' file.path(tempdir(), "references"), release = "42",
#' annotation_set = "basic",
#' features = "transcript"
#' )
#'
#' ## Plot tss profile
#' plot_tss_profile(frags, genes)
#' @export
plot_tss_profile <- function(fragments, genes, cell_groups = rlang::rep_along(cellNames(fragments), "all"),
flank = 2000L, smooth = 0L, zero_based_coords = !is(genes, "GRanges"),
Expand Down Expand Up @@ -485,6 +520,48 @@ collect_features <- function(source, features = NULL, gene_mapping = human_gene_
#' @return By default, returns a ggplot2 object with all the requested features plotted
#' in a grid. If `return_data` or `return_plot_list` is called, the return value will
#' match that argument.
#' @examples
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm rendering two plots, once by just the clusters, and one with the features. Not really understanding why this is erroring out, when running this code in console runs completely fine.

## Prep data
#' set.seed(123)
#' mat <- get_demo_mat()
#' ## Normalize matrix
#' mat_norm <- log1p(multiply_cols(mat, 1/colSums(mat)) * 10000) %>% write_matrix_memory(compress = FALSE)
#' ## Get variable genes
#' stats <- matrix_stats(mat, row_stats = "variance")
#' variable_genes <- order(stats$row_stats["variance",], decreasing=TRUE) %>%
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really liking doing all of this setup where I'm doing PCA and clustering. I think this can be vastly shortened when we have the #189 feature added in + a PCA DimReduction. But should we just do it with a fake matrix created by manually concatting multiple different distributions?

#' head(1000) %>%
#' sort()
#' # Z score normalize genes
#' mat_norm <- mat[variable_genes, ]
#' gene_means <- stats$row_stats['mean', variable_genes]
#' gene_vars <- stats$row_stats['variance', variable_genes]
#' mat_norm <- (mat_norm - gene_means) / gene_vars
#' ## Save matrix to memory
#' mat_norm <- mat_norm %>% write_matrix_memory(compress = FALSE)
#' ## Run SVD
#' svd <- BPCells::svds(mat_norm, k = 10)
#' pca <- multiply_cols(svd$v, svd$d)
#' ## Get UMAP
#' umap <- uwot::umap(pca)
#' ## Get clusters
#' clusts <- knn_hnsw(pca, ef = 500) %>%
#' knn_to_snn_graph() %>%
#' cluster_graph_louvain()
#'
#'
#' ## Plot embedding
#' print(length(clusts))
#'
#' plot_embedding(clusts, umap)
#'
#'
#' ### Can also plot by features
#' #plot_embedding(
#' # source = mat,
#' # umap,
#' # features = c("MS4A1", "CD3E"),
#' #)
#'
#' @export
plot_embedding <- function(source, embedding, features = NULL,
quantile_range = c(0.01, 0.99),
Expand Down Expand Up @@ -741,6 +818,17 @@ rotate_x_labels <- function(degrees = 45) {
#' @inheritParams plot_embedding
#' @param gene_mapping An optional vector for gene name matching with match_gene_symbol().
#' @param colors Color scale for plot
#' @examples
#' ## Prep data
#' mat <- get_demo_mat()
#' cell_types <- paste("Group", rep(1:3, length.out = length(colnames(mat))))
#'
#' ## Plot dot
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue with the y label where it's not being rendered properly. I think it is again with the argument_name() navigating up the call stack.

#' plot <- plot_dot(mat, c("MS4A1", "CD3E"), cell_types)
#'
#' BPCells:::render_plot_from_storage(
#' plot, width = 4, height = 5
#' )
#' @export
plot_dot <- function(source, features, groups, group_order = NULL, gene_mapping = human_gene_mapping,
colors = c("lightgrey", "#4682B4"),
Expand Down
121 changes: 121 additions & 0 deletions r/R/trackplots.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,22 @@ trackplot_normalize_ranges_with_metadata <- function(data, metadata) {
return(data)
}

#' Render a plot with intermediate disk storage step
#'
#' Take a plotting object and save in temp storage, so it can be outputted with exact dimensions.
#' Primarily used to allow for adjusting plot dimensions within function reference examples.
#' @param plot (ggplot) ggplot output from a plotting function
#' @param width (numeric) width of rendered plot
#' @param height (numeric) height of rendered plot
#' @keywords internal
render_plot_from_storage <- function(plot, width, height) {
assert_is(plot, "ggplot")
image_path <- tempfile(fileext = ".png")
ggplot2::ggsave(image_path, plot, width = width, height = height)
img <- png::readPNG(image_path)
grid::grid.raster(img)
}

#' Combine track plots
#'
#' Combines multiple track plots of the same region into a single grid.
Expand All @@ -268,6 +284,39 @@ trackplot_normalize_ranges_with_metadata <- function(data, metadata) {
#' the text label, y-axis, and plot body. The relative height of each row is given
#' by heights. A shared title and x-axis are put at the top.
#' @seealso `trackplot_coverage()`, `trackplot_gene()`, `trackplot_loop()`, `trackplot_scalebar()`
#' @examples
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we expand the trackplot_combine() section, or is this good for now?

#' ## Prep data
#' frags <- get_demo_frags()
#'
#' ## Use genes and blacklist to determine proper number of reads per cell
#' genes <- read_gencode_transcripts(
#' file.path(tempdir(), "references"), release = "42",
#' annotation_set = "basic",
#' features = "transcript"
#' )
#' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38")
#' read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags
#' region <- "chr4:3034877-4034877"
#' cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags))))
#' transcripts <- read_gencode_transcripts(
#' file.path(tempdir(), "references"), release = "42",
#' annotation_set = "basic"
#' )
#' region <- "chr4:3034877-4034877"
#'
#'
#' ## Get all trackplots and scalebars to combine
#' plot_scalebar <- trackplot_scalebar(region)
#' plot_gene <- trackplot_gene(transcripts, region)
#' plot_coverage <- trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts)
#'
#'
#' ## Combine trackplots and render
#' ## Also remove colors from gene track
#' plot <- trackplot_combine(
#' list(plot_scalebar, plot_coverage, plot_gene + ggplot2::guides(color = "none"))
#' )
#' BPCells:::render_plot_from_storage(plot, width = 6, height = 4)
#' @export
trackplot_combine <- function(tracks, side_plot = NULL, title = NULL, side_plot_width = 0.3) {
for (plot in tracks) {
Expand Down Expand Up @@ -407,6 +456,26 @@ trackplot_combine <- function(tracks, side_plot = NULL, title = NULL, side_plot_
#' specify the labels for each track. If `return_data` or `return_plot_list` is
#' `TRUE`, the return value will be modified accordingly.
#' @seealso `trackplot_combine()`, `trackplot_gene()`, `trackplot_loop()`, `trackplot_scalebar()`
#' @examples
## Prep data
#' frags <- get_demo_frags()
#'
#' ## Use genes and blacklist to determine proper number of reads per cell
#' genes <- read_gencode_transcripts(
#' file.path(tempdir(), "references"), release = "42",
#' annotation_set = "basic",
#' features = "transcript"
#' )
#' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38")
#' read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags
#' region <- "chr4:3034877-4034877"
#' cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags))))
#'
#'
#' BPCells:::render_plot_from_storage(
#' trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts),
#' width = 6, height = 3
#' )
#' @export
trackplot_coverage <- function(fragments, region, groups,
cell_read_counts,
Expand Down Expand Up @@ -510,6 +579,18 @@ trackplot_coverage <- function(fragments, region, groups,
#' @param label_size size for transcript labels in units of mm
#' @return Plot of gene locations
#' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_loop()`, `trackplot_scalebar()`
#' @examples
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side thought but maybe it would be good to split into to y-axis levels even when there isn't direct intersection of two genes. It is a little bit hard to distinguish the genes in this example.

#' ## Prep data
#' transcripts <- read_gencode_transcripts(
#' file.path(tempdir(), "references"), release = "42",
#' annotation_set = "basic", features = "transcript"
#' )
#' region <- "chr4:3034877-4034877"
#'
#'
#' ## Plot gene trackplot
#' plot <- trackplot_gene(transcripts, region)
#' BPCells:::render_plot_from_storage(plot, width = 6, height = 1)
#' @export
trackplot_gene <- function(transcripts, region, exon_size = 2.5, gene_size = 0.5, label_size = 11*.8/ggplot2::.pt, track_label="Genes", return_data = FALSE) {
region <- normalize_ranges(region)
Expand Down Expand Up @@ -607,6 +688,28 @@ trackplot_gene <- function(transcripts, region, exon_size = 2.5, gene_size = 0.5
#' @param show_strand If TRUE, show strand direction as arrows
#' @return Plot of genomic loci if return_data is FALSE, otherwise returns the data frame used to generate the plot
#' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_loop()`, `trackplot_scalebar()`, `trackplot_gene()`
#' @examples
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The peaks here don't look great, should I use a different example?

#' ## Prep data
#' ## Peaks generated from demo frags, as input into `call_peaks_tile()`
#' peaks <- tibble::tibble(
#' chr = factor(rep("chr4", 16)),
#' start = c(3041400, 3041733, 3037400, 3041933, 3040466, 3041200,
#' 3038200, 3038000, 3040266, 3037733, 3040800, 3042133,
#' 3038466, 3037200, 3043333, 3040066),
#' end = c(3041600, 3041933, 3037600, 3042133, 3040666, 3041400,
#' 3038400, 3038200, 3040466, 3037933, 3041000, 3042333,
#' 3038666, 3037400, 3043533, 3040266),
#' enrichment = c(46.4, 43.5, 28.4, 27.3, 17.3, 11.7,
#' 10.5, 7.95, 7.22, 6.86, 6.32, 6.14,
#' 5.96, 5.06, 4.51, 3.43)
#' )
#' region <- "chr4:3034877-3044877"
#'
#' ## Plot peaks
#' BPCells:::render_plot_from_storage(
#' trackplot_genome_annotation(peaks, region, color_by = "enrichment"),
#' width = 6, height = 1
#' )
#' @export
trackplot_genome_annotation <- function(loci, region, color_by = NULL, colors = NULL, label_by = NULL, label_size = 11*.8/ggplot2::.pt, show_strand=FALSE,
annotation_size = 2.5, track_label="Peaks", return_data = FALSE) {
Expand Down Expand Up @@ -729,6 +832,19 @@ trackplot_genome_annotation <- function(loci, region, color_by = NULL, colors =
#'
#' @return Plot of loops connecting genomic coordinates
#' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_gene()`, `trackplot_scalebar()`, `trackplot_genome_annotation()`
#' @examples
#' peaks <- c(3054877, 3334877, 3534877, 3634877, 3734877)
#' loops <- tibble::tibble(
#' chr = "chr4",
#' start = peaks[c(1,1,2,3)],
#' end = peaks[c(2,3,4,5)],
#' score = c(4,1,3,2)
#' )
#' region <- "chr4:3034877-4034877"
#'
#' ## Plot loops
#' plot <- trackplot_loop(loops, region, color_by = "score")
#' BPCells:::render_plot_from_storage(plot, width = 6, height = 1.5)
#' @export
trackplot_loop <- function(loops, region, color_by=NULL, colors=NULL, allow_truncated=TRUE, curvature=0.75, track_label="Links", return_data = FALSE) {
region <- normalize_ranges(region)
Expand Down Expand Up @@ -826,6 +942,11 @@ trackplot_loop <- function(loops, region, color_by=NULL, colors=NULL, allow_trun
#'
#' @return Plot with coordinates and scalebar for plotted genomic region
#' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_gene()`, `trackplot_loop()`
#' @examples
#' region <- "chr4:3034877-3044877"
#' BPCells:::render_plot_from_storage(
#' trackplot_scalebar(region), width = 6, height = 1
#' )
#' @export
trackplot_scalebar <- function(region, font_pt=11) {
region <- normalize_ranges(region)
Expand Down
11 changes: 11 additions & 0 deletions r/man/gene_region.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions r/man/plot_dot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions r/man/plot_embedding.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading