Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@ Type: Package
Title: A Toolbox for Spatial Gene Expression Analysis
Version: 0.1.0
Authors@R: as.person(c(
"Jan Kueckelhaus <jan.kueckelhaus@uniklinik-freiburg.de> [aut, cre]",
"Jan Kueckelhaus <jan.kueckelhaus@uniklinik-freiburg.de> [aut, cre]",
"Dieter-Henrik Heiland <dr.dieter.henrik.heiland@uniklinik-freiburg.de> [aut]"
))
Description: This package provides a framework of functions and shiny-applications to make it as easy and intuitive as possible to work with spatial gene expression data.
Description: This package provides a framework of functions and shiny-applications to make it as easy and intuitive as possible to work with spatial gene expression data.
Encoding: UTF-8
LazyData: true
License: GPL-3
BugReports: themilolab-spata@gmx.de
RoxygenNote: 7.1.1
Imports:
RoxygenNote: 7.2.3
Imports:
biomaRt,
broom,
concaveman,
colorspace,
Expand All @@ -33,19 +34,23 @@ Imports:
rlang,
readr,
reticulate,
Seurat,
shiny,
shinyWidgets,
shinybusy,
shinydashboard,
SingleCellExperiment,
sp,
stringr,
stringi,
SummarizedExperiment,
tibble,
tidyr,
tidytext,
viridis,
umap
Collate:
Collate:
'GetPositions.R'
'S4-documentation.R'
'S4-generic-functions.R'
'S4-programming-aid.R'
Expand Down Expand Up @@ -109,6 +114,6 @@ Collate:
'update-spata-object.R'
'valid-input-options.R'
'validation.R'
Depends:
Depends:
R (>= 2.10)
URL: https://themilolab.com/
98 changes: 98 additions & 0 deletions R/GetPositions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#' Remove alternative chromosomes, X chromosome, Y chromosome, and mitochondrial genome from a gene positions dataframe
#'
#' This function removes alternative chromosomes, X chromosome, Y chromosome, and mitochondrial genome from a gene positions dataframe.
#' It also removes any duplicated genes, sorts the dataframe by chromosome column in numeric order, and returns the modified dataframe.
#'
#' @param gene_positions_df A data frame containing gene positions.
#' @return A modified gene positions dataframe with alternative chromosomes, X chromosome, Y chromosome, and mitochondrial chromosome removed, sorted in numeric order.
#' @examples
#' gene_positions <- data.frame(
#' ensembl_gene_id = c("ENSG00000261846", "ENSG00000197953", "ENSG00000262466"),
#' hgnc_symbol = c("AADACL2", "AADACL2", "AADACL2-AS1"),
#' chromosome_name = c("CHR_HSCHR3_1_CTG2_1", "3", "CHR_HSCHR3_1_CTG2_1"),
#' start_position = c(151744454, 151733916, 151761981),
#' end_position = c(151770036, 151761339, 151765669)
#' )
#' ignoreAlternative(gene_positions)
#'
#' @export
ignoreAlternative <- function(gene_positions_df) {

# Sort the dataframe by chromosome column
gene_positions_df <- gene_positions_df[order(gene_positions_df[, 3], decreasing = F), ]

# Remove duplicates based on the gene column
gene_positions_df <- gene_positions_df[!duplicated(gene_positions_df[, 2]), ]

# Replace alternative chromosome names with numeric codes
gene_positions_df[which(gene_positions_df[, 3] == "X"), 3] <- 23
gene_positions_df[which(gene_positions_df[, 3] == "Y"), 3] <- 24
gene_positions_df[which(gene_positions_df[, 3] == "MT"), 3] <- 0

# Remove any chromosome names that are longer than 2 characters
gene_positions_df[which(nchar(gene_positions_df[, 3]) > 2), 3] <- 0

# Sort the dataframe by chromosome column in numeric order
gene_positions_df <- gene_positions_df[order(as.numeric(gene_positions_df[, 3]), decreasing = F), ]

# Return the modified dataframe
return(gene_positions_df)
}

#' Receive genomic coordinates of a gene list
#'
#' This function allows to receive the genomic positions of a vector of genes in HUGO format.
#' @param gene_names A vector of gene names in HUGO format.
#' @param ensembl_version Version of the ENSEMBL database used to quantify gene expression data. Default: v109.
#' @param ignoreAlt If set to TRUE: Ignore if multiple loci are reported for a gene, pick the one from the primary assembly.
#' @keywords Chromosomal positions
#' @export
#' @examples
#' getGenePositions(gene_names = c("EGFR", "PDGFRA"))
getGenePositions <- function(gene_names = character(0),
ensembl_version = "https://feb2023.archive.ensembl.org",
species = "human",
ignoreAlt = F) {
if (species == "human") {
ensembl <- biomaRt::useMart(
biomart = "ENSEMBL_MART_ENSEMBL",
dataset = "hsapiens_gene_ensembl",
host = ensembl_version # use biomaRt::listEnsemblArchives() to check the versions
)

if (length(gene_names) == 0) {
gene_names <- biomaRt::getBM(attributes = "hgnc_symbol", mart = ensembl)$hgnc_symbol
}

gene_positions <- biomaRt::getBM(
attributes = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", "start_position", "end_position"),
filters = "hgnc_symbol",
values = gene_names,
mart = ensembl
)
} else if (species == "mouse") {
ensembl <- biomaRt::useMart(
biomart = "ENSEMBL_MART_ENSEMBL",
dataset = "mmusculus_gene_ensembl",
host = ensembl_version # use biomaRt::listEnsemblArchives() to check the versions
)

if (length(gene_names) == 0) {
gene_names <- biomaRt::getBM(attributes = "mgi_symbol", mart = ensembl)$mgi_symbol
}

gene_positions <- biomaRt::getBM(
attributes = c("ensembl_gene_id", "mgi_symbol", "chromosome_name", "start_position", "end_position"),
filters = "mgi_symbol",
values = gene_names,
mart = ensembl
)
} else {
stop("Species other than human and mouse are not supported.")
}

if (ignoreAlt == T) {
gene_positions <- ignoreAlternative(gene_positions)
}
return(gene_positions)
}
60 changes: 37 additions & 23 deletions R/cnv-analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,17 @@ hlpr_run_cnva_pca <- function(object, n_pcs = 30, of_sample = NA, ...){
#' character variables \emph{ensembl_gene_id}, \emph{hgnc_symbol}, \emph{chromosome_name}
#' and two numeric variables \emph{start_position} and \emph{end_position.}.
#'
#' If NULL the data.frame is created via \code{CONICsmat::getGenePositions()} using
#' all gene names that appear in the count matrix and in the reference matrix.
#' If NULL, the data.frame is created via custom function \code{getGenePositions()}
#' adapted from \code{CONICsmat::getGenePositions()}
#' using #' all gene names that retrieved from ensembl.
#'
#' Defaults to the SPATA2 intern data.frame \code{SPATA2::gene_pos_df}.
#'
#' @param remove_alternative_chr either TRUE or FALSE.
#' If TRUE, remove the chromosome of 0 (mitochondria and contigs), 23 (X), and 24 (Y).
#' If FALSE, keep the chromosome of 0 (mitochondria and contigs), 23 (X), and 24 (Y).
#' Defaults to TRUE to reduce confusion of the user.
#'
#' @param cnv_prefix Character value. Denotes the string with which the
#' the feature variables in which the information about the chromosomal gains and
#' losses are stored are prefixed.
Expand Down Expand Up @@ -253,6 +259,7 @@ runCnvAnalysis <- function(object,
ref_mtr = cnv_ref[["mtr"]], # reference data set of healthy tissue
ref_regions = cnv_ref[["regions"]], # chromosome positions
gene_pos_df = SPATA2::gene_pos_df,
remove_alternative_chr = TRUE,
directory_cnv_folder = "data-development/cnv-results", # output folder
directory_regions_df = NA, # deprecated (chromosome positions)
n_pcs = 30,
Expand Down Expand Up @@ -401,28 +408,43 @@ runCnvAnalysis <- function(object,

}

if(base::is.data.frame(gene_pos_df)){
if (base::is.data.frame(gene_pos_df)){
base::message(
"Default or user-provided dataframe is used."
)

confuns::check_data_frame(
df = gene_pos_df,
var.class = list(
ensembl_gene_id = "character",
hgnc_symbol = "character",
chromosome_name = "character",
start_position = "integer",
end_position = "integer"
)
} else if (base::is.null(gene_pos_df)){
base::message(
"The function getGenePositions() will be use to extract the gene position dataframe."
)

# custom getGenePositions() is adapted from CONICSmat::getGenePositions()
gene_pos_df <- getGenePositions(ignoreAlt = T)
# usethis::use_data(gene_pos_df, overwrite = T) # how to make the built-in rda.

} else {
base::stop("No other options for gene position dataframe.")
}

gene_pos_df <-
CONICSmat::getGenePositions(gene_names = base::rownames(expr_inter))
# Validate the column type of the gene_pos_df.
confuns::check_data_frame(
df = gene_pos_df,
var.class = list(
ensembl_gene_id = "character",
hgnc_symbol = "character",
chromosome_name = "character",
start_position = "integer",
end_position = "integer"
)
)

# Remove the chromosome of 0 (mitochondria and contigs), 23 (X), and 24 (Y)
if (remove_alternative_chr == TRUE) {
gene_pos_df <- dplyr::filter(gene_pos_df, !(chromosome_name %in% c("0", "23", "24")))
}



# -----


Expand Down Expand Up @@ -687,7 +709,7 @@ runCnvAnalysis <- function(object,
result_dir <-
stringr::str_c(directory_cnv_folder, "/", plot_cnv$output_filename, ".observations.txt")

results <- utils::read.table(result_dir)
results <- utils::read.table(result_dir, check.names = FALSE)

bcs_object <-
getFeatureDf(object) %>%
Expand Down Expand Up @@ -724,7 +746,6 @@ runCnvAnalysis <- function(object,
base::as.data.frame() %>%
tibble::rownames_to_column(var = "barcodes") %>%
magrittr::set_colnames(value = cnames) %>%
dplyr::mutate(barcodes = stringr::str_replace_all(string = barcodes, pattern = "\\.", replacement = "-")) %>%
dplyr::mutate(dplyr::across(dplyr::starts_with(match = cnv_prefix), .fns = base::as.numeric)) %>%
tibble::as_tibble()

Expand All @@ -740,13 +761,6 @@ runCnvAnalysis <- function(object,
)

# cnv matrix
base::colnames(results) <-
stringr::str_replace_all(
string = base::colnames(results),
pattern = "\\.",
replacement = "-"
)

cnv_mtr <- base::as.matrix(results)

# cnv list
Expand Down
Binary file modified data/gene_pos_df.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/gene_pos_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/getGenePositions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/ignoreAlternative.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.