-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpathway_enrichment.Rmd
More file actions
95 lines (76 loc) · 2.52 KB
/
pathway_enrichment.Rmd
File metadata and controls
95 lines (76 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
---
title: "Untitled"
author: "Reuben"
date: "2024-12-11"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
packages
```{r}
library(MOFA2)
library(clusterProfiler)
library(org.Hs.eg.db)
```
# load mofa model
```{r}
outfile <- file.path(getwd(), paste0("MOFA_model_15.hdf5"))
model <- load_model(outfile)
```
#pathway enrichment analysis
```{r}
#extract the top 100 features in each factor of intrest
get_feature <- function(factor_list, view, n_features,model){
feature_list <- c()
#extract weights
weights <- get_weights(model, views = "all", factors = "all", abs = TRUE)
# subset to the RNA omics and the factors of interest
weights <- (as.data.frame(weights[[view]]))
weights <- (as.data.frame(weights[,factor_list]))
# loop over each factor
for (x in 1:length(factor_list)){
# order the weights
weights <- arrange(weights, desc(weights[,x]))
# extract a set number of the top weights
feature_list <- c(feature_list, rownames(weights[1:n_features,]))
}
return (feature_list)
}
#features to extract from
factor_list <- c(2,4,5,12)
#extract top 100 features from each factor correlated with response, in the RNA dataset
top_genes <- get_feature(factor_list, "RNA", 100,model)
top_genes <- unique(top_genes)
#convert the gene names from SYMBOL to ENTREZID
genes_names <- bitr(top_genes, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
genes <- genes_names$ENTREZID
#enrichment analysis with go
#molecular function (MF), biological process (BP), and cellular component (CC).
go_enrichment <- function(gene_list, ontology, n_categories){
enrichment_go <- enrichGO(
gene = gene_list,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = ontology,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
qvalueCutoff = 0.2,
minGSSize = 10,
maxGSSize = 500,
readable = FALSE,
pool = FALSE
)
print(dotplot(enrichment_go, showCategory = n_categories))
return(summary(enrichment_go))}
#enrichmentgo plots
bp <- go_enrichment(genes, "BP", 5)
bp <- bp[,c(2,3,4,5,6,9)]
mf <- go_enrichment(genes, "MF", 5)
mf <- mf[,c(2,3,4,5,6,9)]
cc <- go_enrichment(genes, "CC", 5)
cc <- cc[,c(2,3,4,5,6,9)]
write.csv(bp,file = "biological_process_pathway_enrichment.csv", row.names = FALSE)
write.csv(mf,file = "molecular_function_pathway_enrichment.csv", row.names = FALSE)
write.csv(cc,file = "cellular_component_pathway_enrichment.csv", row.names = FALSE)
```