Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Docker_README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#build:
docker build -t gemli-r:v0 -f Dockerfile_v0 .

#push to registry
docker login #user and pass from dockerhub here
docker tag gemli-r:v0 fenix07/gemli-r:v0
docker push fenix07/gemli-r:v0

#pull with docker
docker pull fenix07/gemli-r:v0
docker run -it --rm --privileged fenix07/gemli-r:v0
27 changes: 27 additions & 0 deletions Dockerfile_v0
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use the official R base image
FROM rocker/r-ver:4.3.3

# Install necessary system dependencies for R packages
RUN apt-get update && apt-get install -y \
libcurl4-openssl-dev \
libssl-dev \
libxml2-dev \
git \
libnetcdf-dev \
netcdf-bin \
libpng-dev \
libfontconfig1-dev \
libfreetype6-dev \
&& rm -rf /var/lib/apt/lists/*

# Install remotes package to enable installation from GitHub
RUN R -e "install.packages('remotes', repos='https://cloud.r-project.org/')"

# Install the GEMLI package from the specified subdirectory on GitHub
RUN R -e "remotes::install_github('UPSUTER/GEMLI', subdir = 'GEMLI_package_v0')"

# Verify the installation
RUN R -e "library(GEMLI)"

# Set the command to R console
CMD ["R"]
34 changes: 34 additions & 0 deletions Dockerfile_v1
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Use the official R base image
FROM rocker/r-ver:4.4.1

# Install necessary system dependencies for R packages and LaTeX in smaller batches
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libcurl4-openssl-dev \
libssl-dev \
libxml2-dev \
libnetcdf-dev \
netcdf-bin \
libpng-dev \
libfontconfig1-dev \
libfreetype6-dev \
texlive-latex-base \
texlive-fonts-recommended && \
apt-get clean && rm -rf /var/lib/apt/lists/*# Install necessary system dependencies for R packages

# Install remotes package to enable installation from GitHub
RUN R -e "install.packages('remotes', repos='https://cloud.r-project.org/')"
RUN R -e "install.packages('Seurat')"

# Copy the local GEMLI package directory to the Docker image and install the GEMLI package from the local directory
COPY GEMLI_package_v1 /opt/GEMLI_package_v1
RUN R -e "remotes::install_local('/opt/GEMLI_package_v1')"

# Install the GEMLI package from the specified subdirectory on GitHub
#RUN R -e "remotes::install_github('UPSUTER/GEMLI', subdir = 'GEMLI_package_v1')"

# Verify the installation
RUN R -e "library(GEMLI)"

# Set the command to R console
CMD ["R"]
27 changes: 27 additions & 0 deletions GEMLI_package_v1/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Package: GEMLI
Type: Package
Title: Gene expression memory based lineage inference
Version: 0.2.0
Author: Marcel Tarbier and Almut Eisele
Maintainer: The package maintainer <almut.eisele@epfl.ch>
Description: Uses general characteristics of genes with lineage-specific expression
to predict cell lineages in scRNA-seq datasets.
URL: https://github.com/UPSUTER/GEMLI
Depends: R (>= 4.3.3)
Imports:
Matrix,
matrixStats,
fastcluster,
ggrepel,
methods,
igraph (>= 2.0.3),
reshape (>= 0.8.9),
dplyr (>= 1.1.2),
ggplot2 (>= 3.4.2),
UpSetR (>= 1.4.0),
tidyr (>= 1.3.0)
Suggests:
Seurat (>= 4.3.0)
License: GPL-3
Encoding: UTF-8
LazyData: true
20 changes: 20 additions & 0 deletions GEMLI_package_v1/GEMLI.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
32 changes: 32 additions & 0 deletions GEMLI_package_v1/NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
importFrom(igraph, "clusters", "plot.igraph", "edge_attr", "layout.fruchterman.reingold", "layout.kamada.kawai", "layout.grid")
importFrom(graphics, "axis", "layout", "legend", "mtext", "par", "title")
importFrom(methods, "hasArg")
importFrom(stats, "as.dist", "cutree", "lm", "loess", "loess.control", "quantile", "residuals", "sd")
importFrom(UpSetR, "upset", "fromList")
importFrom(ggplot2, "ggplot", "aes", "geom_line", "geom_point", "theme_classic", "theme", "theme_bw", "scale_colour_manual", "geom_vline", "geom_hline", "labs", "xlim", "element_blank", "element_text")
importFrom(ggrepel, "geom_text_repel")
importFrom(tidyr, "spread", "gather")
importFrom(reshape, "cast")
importFrom(Matrix, "Matrix", "t", "crossprod", "colMeans", "colSums", "Diagonal")
importFrom(fastcluster, hclust)
importFrom(matrixStats, rowMeans2, rowSums2)
importFrom("stats", "na.omit")
import(dplyr)

export(cell_fate_DEG_calling)
export(cell_type_composition_plot)
export(cluster_stability_plot)
export(DEG_volcano_plot)
export(extract_cell_fate_lineages)
export(memory_gene_calling)
export(predict_lineages)
export(predict_lineages_multiple_sizes)
export(predict_lineages_with_known_markers)
export(prediction_to_lineage_information)
export(quantify_clusters_iterative)
export(suggest_network_trimming_to_size)
export(test_lineages)
export(trim_network_to_size)
export(visualize_as_network)
exportMethods(addBarcodes, addPrediction, addTestingResults)
export(GEMLI)
18 changes: 18 additions & 0 deletions GEMLI_package_v1/R/DEG_volcano_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
DEG_volcano_plot<-function(GEMLI_items, name1, name2){
DEG<-GEMLI_items[['DEG']]
DEG$change = ifelse(DEG$p_val_adj <= 0.05 & abs(DEG$avg_log2FC) >= 0.5, ifelse(DEG$avg_log2FC> 0.5 ,name1,name2),'Stable')
DEG$label=rownames(DEG)
plt<-ggplot(data = DEG, aes(x = avg_log2FC , y = -log10(p_val_adj), colour=change, label=label)) +
ggplot2::geom_point(alpha=0.4, size=3.5)+
ggplot2::xlim(c(-4.5, 4.5)) +
ggplot2::scale_color_manual(values=c("#5386BD", "darkred","grey"))+
ggplot2::geom_vline(xintercept=c(-0.5,0.5),lty=4,col="black",lwd=0.8) +
ggplot2::geom_hline(yintercept = 1.301,lty=4,col="black",lwd=0.8)+
ggplot2::labs(x="log2(fold change)",y="-log10 (p-value)", title=paste0("DEG"," ",name1," vs ",name2)) +
ggplot2::theme_bw()+
ggplot2::theme(plot.title = element_text(hjust = 0.5),
legend.position="right",
legend.title = element_blank()) +
ggrepel::geom_text_repel(data = subset(DEG, avg_log2FC >= 0.5 | avg_log2FC < -0.5), aes(label = label), max.overlaps = 15)
suppressWarnings(print(plt))
}
11 changes: 11 additions & 0 deletions GEMLI_package_v1/R/calculate_correlations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
calculate_correlations <- function(data)
{
data <- t(as.matrix(data))
tmp = t(apply(data, 1, rank))
tmp = as.matrix(tmp)
tmp = tmp - matrixStats::rowMeans2(tmp)
tmp = tmp / sqrt(matrixStats::rowSums2(tmp^2))
r = tcrossprod(tmp)
diag(r) <- 0
return(r)
}
28 changes: 28 additions & 0 deletions GEMLI_package_v1/R/cell_fate_DEG_calling.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cell_fate_DEG_calling<-function(GEMLI_items, ident1, ident2, min.pct=0.05, logfc.threshold=0.1)
{
if (!requireNamespace("Seurat", quietly = TRUE)) {
stop("Seurat is required for this function. Please install and load it before using cell_fate_DEG_calling")
}
if (class(GEMLI_items)=='list') {
gene_expression = Matrix::Matrix(GEMLI_items[['gene_expression']])
} else if (class(GEMLI_items)=='GEMLI') {
gene_expression = Matrix::Matrix(GEMLI_items@gene_expression)
} else {
stop('Object GEMLI_items should be either of class list or GEMLI')
}
GEMLI_Seurat<-Seurat::CreateSeuratObject(gene_expression, project = "SeuratProject", assay = "RNA")
Metadata<-GEMLI_items[['cell_fate_analysis']]
Metadata$ident<-NA
Metadata$ident[Metadata$cell.fate %in% ident1]<-"ident1"
Metadata$ident[Metadata$cell.fate%in%ident2]<-"ident2"
Meta<-as.data.frame(Metadata[,c(5)])
rownames(Meta)<-Metadata$cell.ID
colnames(Meta)<-c("cell.fate")
GEMLI_Seurat<-Seurat::AddMetaData(GEMLI_Seurat, Meta, col.name = NULL)
Seurat::DefaultAssay(object = GEMLI_Seurat) <- "RNA"
Seurat::Idents(GEMLI_Seurat) <- GEMLI_Seurat$cell.fate
GEMLI_Seurat <- Seurat::NormalizeData(GEMLI_Seurat, normalization.method = "LogNormalize", scale.factor = 10000)
DEG <- Seurat::FindMarkers(object = GEMLI_Seurat, ident.1 = "ident1", ident.2 = "ident2", min.pct = min.pct, logfc.threshold = logfc.threshold, assay='RNA')
GEMLI_items[['DEG']]<-DEG
return(GEMLI_items)
}
49 changes: 49 additions & 0 deletions GEMLI_package_v1/R/cell_type_composition_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
cell_type_composition_plot <- function(GEMLI_items, ground_truth=F, cell_type_colors=F, type, intersections=NULL)
{

if (class(GEMLI_items)=='list') {
cell_type = GEMLI_items[['cell_type']]
barcodes = GEMLI_items[['barcodes']]
predicted_lineage_table = GEMLI_items[['predicted_lineage_table']]
} else if (class(GEMLI_items)=='GEMLI') {
cell_type = GEMLI_items@cell_type
barcodes = GEMLI_items@barcodes
predicted_lineage_table = GEMLI_items[['predicted_lineage_table']]
} else {
stop('Object GEMLI_items should be either of class list or GEMLI')
}
base_colors = rep(c('#a50026','#d73027','#f46d43','#fdae61','#fee090','#e0f3f8','#abd9e9','#74add1','#4575b4','#313695','#40004b','#762a83','#9970ab','#c2a5cf','#e7d4e8','#d9f0d3','#a6dba0','#5aae61','#1b7837','#00441b','#543005','#8c510a','#bf812d','#dfc27d','#f6e8c3','#e0e0e0','#bababa','#878787','#4d4d4d','#1a1a1a'), 100)
if (cell_type_colors==F){
cell.type<-unique(cell_type$cell.type)
color<-base_colors[rank(unique(cell_type$cell.type))]
GEMLI_items[['cell_type_color']] = data.frame(cell.type, color)
}
if (class(GEMLI_items)=='list') {
cell_type = GEMLI_items[['cell_type']]
} else if (class(GEMLI_items)=='GEMLI') {
cell_type = GEMLI_items@cell_type
}

if (ground_truth){cell.ID<-names(barcodes); clone.ID<-unname(barcodes); GT<-as.data.frame(cbind(clone.ID,cell.ID));
Lookup<-merge(GT, cell_type, by="cell.ID", all=TRUE)} else {
Lookup<-merge(as.data.frame(predicted_lineage_table), cell_type, by="cell.ID", all=TRUE)
}

if (type == "bubble"){
Lookup <- Lookup %>% dplyr::group_by(clone.ID, cell.type) %>% dplyr::summarise(cnt = dplyr::n()) %>% dplyr::mutate(freq = round(cnt / sum(cnt), 3)); Lookup <- reshape::cast(Lookup, clone.ID~cell.type, value="freq");
base_colors = GEMLI_items[['cell_type_color']]$color[match(colnames(Lookup[,2:length(Lookup)]),GEMLI_items[['cell_type_color']]$cell.type)]
p<-Lookup %>% gather(cell.type, percentage, -clone.ID)%>% ggplot(group=cell.type) + ggplot2::geom_point(aes(x = cell.type, y = clone.ID, size = percentage, col= cell.type))+ ggplot2::theme_classic()+ scale_colour_manual(values = base_colors)}

if (type == "upsetR"){
Lookup_list <- split(Lookup$clone.ID, Lookup$cell.type)
p<-UpSetR::upset(fromList(Lookup_list), order.by = "freq", nsets = length(Lookup_list),
sets.x.label = "Lineages in cell type", mainbar.y.label = "Number of lineages",
nintersects = NA, intersections = NULL, point.size=5, mb.ratio = c(0.5, 0.5), text.scale = 2,
set_size.show = TRUE, set_size.numbers_size = 7, set_size.scale_max = length(unique(Lookup$clone.ID)))}

if (type == "plain"){
Lookup<-unique(Lookup[,-c(1)])
p<-Lookup %>% dplyr::group_by(clone.ID) %>% dplyr::arrange(clone.ID, cell.type) %>% dplyr::summarize(combi = paste0(cell.type, collapse = "__"), .groups = "drop") %>% dplyr::count(combi)}

return(p)
}
100 changes: 100 additions & 0 deletions GEMLI_package_v1/R/classes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
setClass(
"GEMLI",
slots = list(
gene_expression = "dgCMatrix", # Assuming gene_expression is a matrix
barcodes = "character", # Optional, vector of barcodes
prediction = "matrix", # Optional, data frame for prediction results
testing_results = "matrix" # Optional, data frame for testing results
),
prototype = list( # Default values for optional slots
barcodes = character(0),
prediction = matrix(),
testing_results = matrix()
)
)

GEMLI <- function(gene_expression, barcodes=character(0), prediction=NULL, testing_results=NULL) {
if (!is.matrix(gene_expression) & class(gene_expression)[1]!='Seurat' & class(gene_expression)[1]!='dgCMatrix' & class(gene_expression)[1]!='dgeMatrix') {
stop("gene_expression must be a matrix, a Seurat object or a dgCMatrix")
}
if (!is.null(barcodes) && !is.character(barcodes)) {
stop("barcodes must be a character vector")
}
if (!is.null(prediction) && !is.matrix(prediction)) {
stop("prediction must be a data frame")
}
if (!is.null(testing_results) && !is.matrix(testing_results)) {
stop("testing_results must be a data frame")
}

# Provide default empty data frames if NULL
prediction <- if (is.null(prediction)) matrix() else prediction
testing_results <- if (is.null(testing_results)) matrix() else testing_results

if (class(gene_expression)[1]=='Seurat') {
if (!requireNamespace("Seurat", quietly = TRUE)) {
stop("Seurat is required for this function. Please install and load it before using cell_fate_DEG_calling")
} else {
gene_expression <- Matrix::Matrix(GetAssayData(object=gene_expression, layer="counts"))
}
}
if (class(gene_expression)[1]=='dgeMatrix' | is.matrix(gene_expression)) {
gene_expression <- Matrix::Matrix(gene_expression, sparse=T)
}

new("GEMLI", gene_expression = gene_expression, barcodes = barcodes, prediction = prediction,
testing_results = testing_results)
}

# Define the custom show method for the GEMLI class
setMethod("show", "GEMLI", function(object) {
cat(paste0("GEMLI object with ", nrow(object@gene_expression), " genes and ", ncol(object@gene_expression), " cells\n\n"))

if(length(object@barcodes) > 1) {
cat("Barcodes: Provided\n")
} else {
cat("Barcodes: Not provided\n")
}

if(nrow(object@prediction) > 1) {
cat("Prediction: Available\n")
} else {
cat("Prediction: Not available\n")
}

if(nrow(object@testing_results) > 1) {
cat("Testing Results: Available\n")
} else {
cat("Testing Results: Not available\n")
}
})

# Method to add or update barcodes
setGeneric("addBarcodes", function(object, barcodes) standardGeneric("addBarcodes"))
setMethod("addBarcodes", "GEMLI", function(object, barcodes) {
if (!is.character(barcodes)) {
stop("barcodes must be a character vector")
}
validObject(object@barcodes <- barcodes)
return(object)
})

# Method to add or update prediction
setGeneric("addPrediction", function(object, prediction) standardGeneric("addPrediction"))
setMethod("addPrediction", "GEMLI", function(object, prediction) {
if (!is.matrix(prediction)) {
stop("prediction must be a matrix")
}
validObject(object@prediction <- prediction)
return(object)
})

# Method to add or update testing results
setGeneric("addTestingResults", function(object, testing_results) standardGeneric("addTestingResults"))
setMethod("addTestingResults", "GEMLI", function(object, testing_results) {
if (!is.matrix(testing_results)) {
stop("testing results must be a matrix")
}
validObject(object@testing_results <- testing_results)
return(object)
})
7 changes: 7 additions & 0 deletions GEMLI_package_v1/R/cluster_stability_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
cluster_stability_plot <- function(GEMLI_items) # check
{
data_matrix<-GEMLI_items[['prediction_multiple_sizes']]
clustree<-clustree(data_matrix, prefix = "K")
clustree<-clustree[["data"]][which(clustree[["data"]]$size != 1),]
plot(clustree$K, clustree$sc3_stability, xlab="lineage size", ylab="clustree_stability_index")
}
19 changes: 19 additions & 0 deletions GEMLI_package_v1/R/extract_cell_fate_lineages.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
extract_cell_fate_lineages<- function(GEMLI, selection, unique=FALSE, threshold)
{
Lookup<-merge(as.data.frame(GEMLI[['predicted_lineage_table']]), GEMLI[['cell_type']], by="cell.ID", all=TRUE)
if (unique){
Lookup$cell.fate<-NA
Lookup<-Lookup %>% dplyr::group_by(clone.ID) %>% dplyr::mutate(cell.fate=dplyr::case_when((n_distinct(cell.type)==length(selection)& all(cell.type %in% selection)& is.na(clone.ID)==FALSE)~ "asym", (n_distinct(cell.type)==1& all(cell.type %in% selection) & n_distinct(cell.ID)>1& is.na(clone.ID)==FALSE)~"sym"))
} else {
Lookup2<-Lookup[Lookup$cell.type %in% selection, ]
Lookup2<-Lookup2 %>% dplyr::group_by(clone.ID) %>% dplyr::mutate(cell.fate=dplyr::case_when((n_distinct(cell.type)==length(selection)& all(cell.type %in% selection)& is.na(clone.ID)==FALSE)~ "asym", (n_distinct(cell.type)==1& all(cell.type %in% selection) & n_distinct(cell.ID)>1& is.na(clone.ID)==FALSE)~"sym"))
Lookup2<-Lookup2[,c(1,4)]; Lookup<-merge(Lookup, Lookup2, by=c("cell.ID" ), all=TRUE)
}
# filter by threshold
Lookup <- Lookup %>% dplyr::group_by(cell.fate, clone.ID) %>% dplyr::mutate(cnt = dplyr::n()); Lookup<-Lookup%>% dplyr::group_by(cell.fate, clone.ID, cell.type) %>% dplyr::mutate(per= (n()/cnt)*100)
for(i in 1:length(selection)){Lookup <- Lookup %>% dplyr::group_by(cell.fate, clone.ID) %>% dplyr::mutate(cell.fate=dplyr::case_when(((cell.fate=="asym") & (cell.type==selection[i] ) & (per < threshold[i]))~"filtered", TRUE~cell.fate))}
Lookup<-Lookup %>% dplyr::group_by(clone.ID) %>% dplyr::mutate(cell.fate=dplyr::case_when(any(cell.fate=="filtered")~NA, TRUE~cell.fate))
Lookup$cell.fate <- paste(Lookup$cell.fate,Lookup$cell.type,sep = "_")
GEMLI[['cell_fate_analysis']]<-Lookup[,1:4]
return(GEMLI)
}
Loading