-
Notifications
You must be signed in to change notification settings - Fork 30
[r] Add examples for single cell plots and trackplots #233
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9824d30
f685ad8
e1ec1cf
4b748b9
8311ca8
c96db54
af1eba7
c6872dc
a321917
4d71059
0911202
6a850de
20bf3ec
920007b
14407a1
dbbb491
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -108,6 +108,13 @@ continuous_palette <- function(name) { | |
| #' @param return_data If true, return data from just before plotting rather than a plot. | ||
| #' @param apply_styling If false, return a plot without pretty styling applied | ||
| #' @return ggplot2 plot object | ||
| #' @examples | ||
| #' ## Prep data | ||
| #' mat <- get_demo_mat(filter_qc = FALSE, subset = FALSE) | ||
| #' reads_per_cell <- colSums(mat) | ||
| #' | ||
| #' # Render knee plot | ||
| #' plot_read_count_knee(reads_per_cell, cutoff = 1e3) | ||
| #' @export | ||
| plot_read_count_knee <- function(read_counts, cutoff = NULL, return_data = FALSE, apply_styling = TRUE) { | ||
| # Keep ~1k cells per order of magnitude to speed up plotting | ||
|
|
@@ -178,6 +185,20 @@ plot_read_count_knee <- function(read_counts, cutoff = NULL, return_data = FALSE | |
| #' @param min_tss Minimum TSS Enrichment cutoff | ||
| #' @param bins Number of bins for density calculation | ||
| #' @inheritParams plot_embedding | ||
| #' @examples | ||
| #' ## Prep data | ||
| #' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) | ||
| #' genes <- read_gencode_transcripts( | ||
| #' file.path(tempdir(), "references"), release = "42", | ||
| #' annotation_set = "basic", | ||
| #' features = "transcript" | ||
| #' ) | ||
| #' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") | ||
| #' atac_qc <- qc_scATAC(frags, genes, blacklist) | ||
| #' | ||
| #' | ||
| #' ## Render tss enrichment vs fragment plot | ||
| #' plot_tss_scatter(atac_qc, min_frags = 1000, min_tss = 10) | ||
| #' @export | ||
| plot_tss_scatter <- function(atac_qc, min_frags = NULL, min_tss = NULL, bins = 100, apply_styling = TRUE) { | ||
| assert_is(atac_qc, "data.frame") | ||
|
|
@@ -255,6 +276,9 @@ plot_tss_scatter <- function(atac_qc, min_frags = NULL, min_tss = NULL, bins = 1 | |
| #' @param max_length Maximum length to show on the plot | ||
| #' @inheritParams plot_embedding | ||
| #' @return Numeric vector where index i contans the number of length-i fragments | ||
| #' @examples | ||
| #' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) | ||
| #' plot_fragment_length(frags) | ||
| #' @export | ||
| plot_fragment_length <- function(fragments, max_length = 500, return_data = FALSE, apply_styling = TRUE) { | ||
| assert_is(fragments, "IterableFragments") | ||
|
|
@@ -297,6 +321,17 @@ plot_fragment_length <- function(fragments, max_length = 500, return_data = FALS | |
| #' @param genes Coordinate ranges for genes (must include strand) | ||
| #' @param smooth Number of bases to smooth over (rolling average) | ||
| #' @seealso `footprint()`, `plot_tf_footprint()` | ||
| #' @examples | ||
| #' ## Prep data | ||
| #' frags <- get_demo_frags(filter_qc = FALSE, subset = FALSE) | ||
| #' genes <- read_gencode_transcripts( | ||
| #' file.path(tempdir(), "references"), release = "42", | ||
| #' annotation_set = "basic", | ||
| #' features = "transcript" | ||
| #' ) | ||
| #' | ||
| #' ## Plot tss profile | ||
| #' plot_tss_profile(frags, genes) | ||
| #' @export | ||
| plot_tss_profile <- function(fragments, genes, cell_groups = rlang::rep_along(cellNames(fragments), "all"), | ||
| flank = 2000L, smooth = 0L, zero_based_coords = !is(genes, "GRanges"), | ||
|
|
@@ -485,6 +520,48 @@ collect_features <- function(source, features = NULL, gene_mapping = human_gene_ | |
| #' @return By default, returns a ggplot2 object with all the requested features plotted | ||
| #' in a grid. If `return_data` or `return_plot_list` is called, the return value will | ||
| #' match that argument. | ||
| #' @examples | ||
| ## Prep data | ||
| #' set.seed(123) | ||
| #' mat <- get_demo_mat() | ||
| #' ## Normalize matrix | ||
| #' mat_norm <- log1p(multiply_cols(mat, 1/colSums(mat)) * 10000) %>% write_matrix_memory(compress = FALSE) | ||
| #' ## Get variable genes | ||
| #' stats <- matrix_stats(mat, row_stats = "variance") | ||
| #' variable_genes <- order(stats$row_stats["variance",], decreasing=TRUE) %>% | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really liking doing all of this setup where I'm doing PCA and clustering. I think this can be vastly shortened when we have the #189 feature added in + a PCA |
||
| #' head(1000) %>% | ||
| #' sort() | ||
| #' # Z score normalize genes | ||
| #' mat_norm <- mat[variable_genes, ] | ||
| #' gene_means <- stats$row_stats['mean', variable_genes] | ||
| #' gene_vars <- stats$row_stats['variance', variable_genes] | ||
| #' mat_norm <- (mat_norm - gene_means) / gene_vars | ||
| #' ## Save matrix to memory | ||
| #' mat_norm <- mat_norm %>% write_matrix_memory(compress = FALSE) | ||
| #' ## Run SVD | ||
| #' svd <- BPCells::svds(mat_norm, k = 10) | ||
| #' pca <- multiply_cols(svd$v, svd$d) | ||
| #' ## Get UMAP | ||
| #' umap <- uwot::umap(pca) | ||
| #' ## Get clusters | ||
| #' clusts <- knn_hnsw(pca, ef = 500) %>% | ||
| #' knn_to_snn_graph() %>% | ||
| #' cluster_graph_louvain() | ||
| #' | ||
| #' | ||
| #' ## Plot embedding | ||
| #' print(length(clusts)) | ||
| #' | ||
| #' plot_embedding(clusts, umap) | ||
| #' | ||
| #' | ||
| #' ### Can also plot by features | ||
| #' #plot_embedding( | ||
| #' # source = mat, | ||
| #' # umap, | ||
| #' # features = c("MS4A1", "CD3E"), | ||
| #' #) | ||
| #' | ||
| #' @export | ||
| plot_embedding <- function(source, embedding, features = NULL, | ||
| quantile_range = c(0.01, 0.99), | ||
|
|
@@ -741,6 +818,17 @@ rotate_x_labels <- function(degrees = 45) { | |
| #' @inheritParams plot_embedding | ||
| #' @param gene_mapping An optional vector for gene name matching with match_gene_symbol(). | ||
| #' @param colors Color scale for plot | ||
| #' @examples | ||
| #' ## Prep data | ||
| #' mat <- get_demo_mat() | ||
| #' cell_types <- paste("Group", rep(1:3, length.out = length(colnames(mat)))) | ||
| #' | ||
| #' ## Plot dot | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same issue with the y label where it's not being rendered properly. I think it is again with the |
||
| #' plot <- plot_dot(mat, c("MS4A1", "CD3E"), cell_types) | ||
| #' | ||
| #' BPCells:::render_plot_from_storage( | ||
| #' plot, width = 4, height = 5 | ||
| #' ) | ||
| #' @export | ||
| plot_dot <- function(source, features, groups, group_order = NULL, gene_mapping = human_gene_mapping, | ||
| colors = c("lightgrey", "#4682B4"), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -252,6 +252,22 @@ trackplot_normalize_ranges_with_metadata <- function(data, metadata) { | |
| return(data) | ||
| } | ||
|
|
||
| #' Render a plot with intermediate disk storage step | ||
| #' | ||
| #' Take a plotting object and save in temp storage, so it can be outputted with exact dimensions. | ||
| #' Primarily used to allow for adjusting plot dimensions within function reference examples. | ||
| #' @param plot (ggplot) ggplot output from a plotting function | ||
| #' @param width (numeric) width of rendered plot | ||
| #' @param height (numeric) height of rendered plot | ||
| #' @keywords internal | ||
| render_plot_from_storage <- function(plot, width, height) { | ||
| assert_is(plot, "ggplot") | ||
| image_path <- tempfile(fileext = ".png") | ||
| ggplot2::ggsave(image_path, plot, width = width, height = height) | ||
| img <- png::readPNG(image_path) | ||
| grid::grid.raster(img) | ||
| } | ||
|
|
||
| #' Combine track plots | ||
| #' | ||
| #' Combines multiple track plots of the same region into a single grid. | ||
|
|
@@ -268,6 +284,39 @@ trackplot_normalize_ranges_with_metadata <- function(data, metadata) { | |
| #' the text label, y-axis, and plot body. The relative height of each row is given | ||
| #' by heights. A shared title and x-axis are put at the top. | ||
| #' @seealso `trackplot_coverage()`, `trackplot_gene()`, `trackplot_loop()`, `trackplot_scalebar()` | ||
| #' @examples | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we expand the trackplot_combine() section, or is this good for now? |
||
| #' ## Prep data | ||
| #' frags <- get_demo_frags() | ||
| #' | ||
| #' ## Use genes and blacklist to determine proper number of reads per cell | ||
| #' genes <- read_gencode_transcripts( | ||
| #' file.path(tempdir(), "references"), release = "42", | ||
| #' annotation_set = "basic", | ||
| #' features = "transcript" | ||
| #' ) | ||
| #' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") | ||
| #' read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags | ||
| #' region <- "chr4:3034877-4034877" | ||
| #' cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) | ||
| #' transcripts <- read_gencode_transcripts( | ||
| #' file.path(tempdir(), "references"), release = "42", | ||
| #' annotation_set = "basic" | ||
| #' ) | ||
| #' region <- "chr4:3034877-4034877" | ||
| #' | ||
| #' | ||
| #' ## Get all trackplots and scalebars to combine | ||
| #' plot_scalebar <- trackplot_scalebar(region) | ||
| #' plot_gene <- trackplot_gene(transcripts, region) | ||
| #' plot_coverage <- trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts) | ||
| #' | ||
| #' | ||
| #' ## Combine trackplots and render | ||
| #' ## Also remove colors from gene track | ||
| #' plot <- trackplot_combine( | ||
| #' list(plot_scalebar, plot_coverage, plot_gene + ggplot2::guides(color = "none")) | ||
| #' ) | ||
| #' BPCells:::render_plot_from_storage(plot, width = 6, height = 4) | ||
| #' @export | ||
| trackplot_combine <- function(tracks, side_plot = NULL, title = NULL, side_plot_width = 0.3) { | ||
| for (plot in tracks) { | ||
|
|
@@ -407,6 +456,26 @@ trackplot_combine <- function(tracks, side_plot = NULL, title = NULL, side_plot_ | |
| #' specify the labels for each track. If `return_data` or `return_plot_list` is | ||
| #' `TRUE`, the return value will be modified accordingly. | ||
| #' @seealso `trackplot_combine()`, `trackplot_gene()`, `trackplot_loop()`, `trackplot_scalebar()` | ||
| #' @examples | ||
| ## Prep data | ||
| #' frags <- get_demo_frags() | ||
| #' | ||
| #' ## Use genes and blacklist to determine proper number of reads per cell | ||
| #' genes <- read_gencode_transcripts( | ||
| #' file.path(tempdir(), "references"), release = "42", | ||
| #' annotation_set = "basic", | ||
| #' features = "transcript" | ||
| #' ) | ||
| #' blacklist <- read_encode_blacklist(file.path(tempdir(), "references"), genome="hg38") | ||
| #' read_counts <- qc_scATAC(frags, genes, blacklist)$nFrags | ||
| #' region <- "chr4:3034877-4034877" | ||
| #' cell_types <- paste("Group", rep(1:3, length.out = length(cellNames(frags)))) | ||
| #' | ||
| #' | ||
| #' BPCells:::render_plot_from_storage( | ||
| #' trackplot_coverage(frags, region, groups = cell_types, cell_read_counts = read_counts), | ||
| #' width = 6, height = 3 | ||
| #' ) | ||
| #' @export | ||
| trackplot_coverage <- function(fragments, region, groups, | ||
| cell_read_counts, | ||
|
|
@@ -510,6 +579,18 @@ trackplot_coverage <- function(fragments, region, groups, | |
| #' @param label_size size for transcript labels in units of mm | ||
| #' @return Plot of gene locations | ||
| #' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_loop()`, `trackplot_scalebar()` | ||
| #' @examples | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Side thought but maybe it would be good to split into to y-axis levels even when there isn't direct intersection of two genes. It is a little bit hard to distinguish the genes in this example. |
||
| #' ## Prep data | ||
| #' transcripts <- read_gencode_transcripts( | ||
| #' file.path(tempdir(), "references"), release = "42", | ||
| #' annotation_set = "basic", features = "transcript" | ||
| #' ) | ||
| #' region <- "chr4:3034877-4034877" | ||
| #' | ||
| #' | ||
| #' ## Plot gene trackplot | ||
| #' plot <- trackplot_gene(transcripts, region) | ||
| #' BPCells:::render_plot_from_storage(plot, width = 6, height = 1) | ||
| #' @export | ||
| trackplot_gene <- function(transcripts, region, exon_size = 2.5, gene_size = 0.5, label_size = 11*.8/ggplot2::.pt, track_label="Genes", return_data = FALSE) { | ||
| region <- normalize_ranges(region) | ||
|
|
@@ -607,6 +688,28 @@ trackplot_gene <- function(transcripts, region, exon_size = 2.5, gene_size = 0.5 | |
| #' @param show_strand If TRUE, show strand direction as arrows | ||
| #' @return Plot of genomic loci if return_data is FALSE, otherwise returns the data frame used to generate the plot | ||
| #' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_loop()`, `trackplot_scalebar()`, `trackplot_gene()` | ||
| #' @examples | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The peaks here don't look great, should I use a different example? |
||
| #' ## Prep data | ||
| #' ## Peaks generated from demo frags, as input into `call_peaks_tile()` | ||
| #' peaks <- tibble::tibble( | ||
| #' chr = factor(rep("chr4", 16)), | ||
| #' start = c(3041400, 3041733, 3037400, 3041933, 3040466, 3041200, | ||
| #' 3038200, 3038000, 3040266, 3037733, 3040800, 3042133, | ||
| #' 3038466, 3037200, 3043333, 3040066), | ||
| #' end = c(3041600, 3041933, 3037600, 3042133, 3040666, 3041400, | ||
| #' 3038400, 3038200, 3040466, 3037933, 3041000, 3042333, | ||
| #' 3038666, 3037400, 3043533, 3040266), | ||
| #' enrichment = c(46.4, 43.5, 28.4, 27.3, 17.3, 11.7, | ||
| #' 10.5, 7.95, 7.22, 6.86, 6.32, 6.14, | ||
| #' 5.96, 5.06, 4.51, 3.43) | ||
| #' ) | ||
| #' region <- "chr4:3034877-3044877" | ||
| #' | ||
| #' ## Plot peaks | ||
| #' BPCells:::render_plot_from_storage( | ||
| #' trackplot_genome_annotation(peaks, region, color_by = "enrichment"), | ||
| #' width = 6, height = 1 | ||
| #' ) | ||
| #' @export | ||
| trackplot_genome_annotation <- function(loci, region, color_by = NULL, colors = NULL, label_by = NULL, label_size = 11*.8/ggplot2::.pt, show_strand=FALSE, | ||
| annotation_size = 2.5, track_label="Peaks", return_data = FALSE) { | ||
|
|
@@ -729,6 +832,19 @@ trackplot_genome_annotation <- function(loci, region, color_by = NULL, colors = | |
| #' | ||
| #' @return Plot of loops connecting genomic coordinates | ||
| #' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_gene()`, `trackplot_scalebar()`, `trackplot_genome_annotation()` | ||
| #' @examples | ||
| #' peaks <- c(3054877, 3334877, 3534877, 3634877, 3734877) | ||
| #' loops <- tibble::tibble( | ||
| #' chr = "chr4", | ||
| #' start = peaks[c(1,1,2,3)], | ||
| #' end = peaks[c(2,3,4,5)], | ||
| #' score = c(4,1,3,2) | ||
| #' ) | ||
| #' region <- "chr4:3034877-4034877" | ||
| #' | ||
| #' ## Plot loops | ||
| #' plot <- trackplot_loop(loops, region, color_by = "score") | ||
| #' BPCells:::render_plot_from_storage(plot, width = 6, height = 1.5) | ||
| #' @export | ||
| trackplot_loop <- function(loops, region, color_by=NULL, colors=NULL, allow_truncated=TRUE, curvature=0.75, track_label="Links", return_data = FALSE) { | ||
| region <- normalize_ranges(region) | ||
|
|
@@ -826,6 +942,11 @@ trackplot_loop <- function(loops, region, color_by=NULL, colors=NULL, allow_trun | |
| #' | ||
| #' @return Plot with coordinates and scalebar for plotted genomic region | ||
| #' @seealso `trackplot_combine()`, `trackplot_coverage()`, `trackplot_gene()`, `trackplot_loop()` | ||
| #' @examples | ||
| #' region <- "chr4:3034877-3044877" | ||
| #' BPCells:::render_plot_from_storage( | ||
| #' trackplot_scalebar(region), width = 6, height = 1 | ||
| #' ) | ||
| #' @export | ||
| trackplot_scalebar <- function(region, font_pt=11) { | ||
| region <- normalize_ranges(region) | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm rendering two plots, once by just the clusters, and one with the features. Not really understanding why this is erroring out, when running this code in console runs completely fine.