-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMOFA_models.Rmd
More file actions
664 lines (569 loc) · 22.1 KB
/
MOFA_models.Rmd
File metadata and controls
664 lines (569 loc) · 22.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
---
title: "Untitled"
author: "Reuben"
date: "2024-12-04"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
#packages
```{r}
library(data.table)
library(plotly)
library(MOFA2)
library(ggplot2)
library(dplyr)
library(reshape2)
library(tidyverse)
library(RColorBrewer)
```
# Load metdata
```{r}
patient_data <- read.delim("ws3_grampian_patient_data.txt")
patient_data <- patient_data[-c(1, 2), ]
# Where are all of the cancers located?
unique(patient_data$Location.of.Tumour) # all rectal cancers
```
# barplots of treatment and treatment response distribution
```{r}
ggplot(patient_data, mapping = aes(x = Response.to.Treatment,fill = Response.to.Treatment)) +
geom_bar(color = "black")+
geom_text(stat='count', aes(label=..count..), vjust=-1)+
scale_y_continuous(limits=c(0, 120))+
ggtitle("Distribution of Response to Treatment")
ggplot(patient_data, mapping = aes(x = Treatment.Arm,fill = Treatment.Arm)) +
geom_bar(color = "black")+
geom_text(stat='count', aes(label=..count..), vjust=-1)+
scale_y_continuous(limits=c(0, 150))+
ggtitle("Distribution of Treatment Type")
```
# Load preprocessed datasets
```{r}
rna <- readRDS("rna_unsupervised_preprocessed")
methylation <- readRDS("methylation_unsupervised_preprocessed")
cna <- readRDS("cna_unsupervised_preprocessed")
mutation<- readRDS("mutation_unsupervised_preprocessed")
```
######################################################################################
# MOFA
# Make sure all samples are in the same order for all the datasets to put into mofa
```{r}
add_missing_colnames <- function(df, df2) {
#get column names of df and df2
df2_colnames <- colnames(df2)
df_colnames <- colnames(df)
#find missing column names in df
missing_colnames <- setdiff(df2_colnames, df_colnames)
#add missing column names to df
if (length(missing_colnames) > 0) {
df[, missing_colnames] <- NA
}# Fill missing samples with NAs as MOFA can handle NA values
#return the modified df
return(df)
}
cna <- add_missing_colnames(cna,rna)
mutation <- add_missing_colnames(mutation,rna)
methylation <- add_missing_colnames(methylation,rna)
methylation <- add_missing_colnames(methylation,cna)
mutation <- add_missing_colnames(mutation,cna)
rna <- add_missing_colnames(rna,cna)
```
# Get all dataset samples in the same order
```{r}
#make mutation data numeric
row_names <- rownames(mutation)
# Convert all values to numeric while keeping it as a data frame
mutation <- as.data.frame(lapply(mutation, as.numeric))
# Reassign preserved row names
rownames(mutation) <- row_names
#order samples
cna <- cna[, order(colnames(cna))]
rna <- rna[, order(colnames(rna))]
mutation <- mutation[, order(colnames(mutation))]
methylation <- methylation[, order(colnames(methylation))]
```
#make mofa object
```{r}
#extract numeric data from dataframes as matrices
rna_mat <- as.matrix(rna)
cna_mat <- as.matrix(cna)
mut_mat <- as.matrix(mutation)
meth_mat <- as.matrix(methylation)
#create a list of matrices
dt <- list(RNA = rna_mat, CNA = cna_mat, Mutation = mut_mat, Methylation = meth_mat)
#verify the dimensions of the resulting list
lapply(dt, dim)
#create MOFA object
MOFAobject <- create_mofa(dt)
ModelOptions <- get_default_model_options(MOFAobject)
# Plot overview of the data
plot_data_overview(MOFAobject)
dim(mut_mat)
```
#make multiple MOFA models with varying numbers of factors - warning takes a long time to run chunk
```{r}
#function to train MOFA model with varying number of factors
train_mofa_with_factors <- function(MOFAobject, n_factors_list) {
#loop through the number of factors in the list
for (n_factors in n_factors_list) {
#data options
data_opts <- get_default_data_options(MOFAobject)
data_opts$scale_views <- TRUE
#get default model options
model_opts <- get_default_model_options(MOFAobject)
#change the number of factors
model_opts$num_factors <- n_factors
# Model everything as gaussian except mutation, which is modelled as bernoulli
model_opts$likelihoods <- c("gaussian", "gaussian", "bernoulli", "gaussian")
#train options
train_opts <- get_default_training_options(MOFAobject)
train_opts$convergence_mode <- "slow"
train_opts$seed <- 42
train_opts$maxiter <- 3000
#prepare MOFA object with modified options
MOFAobject_mod <- prepare_mofa(
object = MOFAobject,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
#define the file path to the trained model
outfile <- file.path(getwd(), paste0("MOFA_model_", n_factors, ".hdf5"))
print(outfile)
# Train the MOFA model - when trained it will save the model object in the outputfile.
MOFAobject_trained <- run_mofa(MOFAobject_mod, outfile, use_basilisk = TRUE)
}
}
n_factors_list <- c(5,10,15,20,25,30,35,40,45,50)# list of factors to loop through
# This will make 10 MOFA models with varying numbers of factors
train_mofa_with_factors(MOFAobject, n_factors_list)
```
#compare models - must run previous chunk to work, else skip this step and follow the chunks that only require model_15
```{r}
# Load the models
outfile <- file.path(getwd(), paste0("MOFA_model_5.hdf5"))
model_5 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_10.hdf5"))
model_10 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_15.hdf5"))
model_15 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_20.hdf5"))
model_20 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_25.hdf5"))
model_25 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_30.hdf5"))
model_30 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_35.hdf5"))
model_35 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_40.hdf5"))
model_40 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_45.hdf5"))
model_45 <- load_model(outfile)
outfile <- file.path(getwd(), paste0("MOFA_model_50.hdf5"))
model_50 <- load_model(outfile)
#get varaince explained for each model
variance_5 <- setNames(as.data.frame(model_5@cache$variance_explained[1]), "5 Factors")
variance_10 <- setNames(as.data.frame(model_10@cache$variance_explained[1]), "10 Factors")
variance_15 <- setNames(as.data.frame(model_15@cache$variance_explained[1]), "15 Factors")
variance_20 <- setNames(as.data.frame(model_20@cache$variance_explained[1]), "20 Factors")
variance_25 <- setNames(as.data.frame(model_25@cache$variance_explained[1]), "25 Factors")
variance_30 <- setNames(as.data.frame(model_30@cache$variance_explained[1]), "30 Factors")
variance_35 <- setNames(as.data.frame(model_35@cache$variance_explained[1]), "35 Factors")
variance_40 <- setNames(as.data.frame(model_40@cache$variance_explained[1]), "40 Factors")
variance_45 <- setNames(as.data.frame(model_45@cache$variance_explained[1]), "45 Factors")
variance_50 <- setNames(as.data.frame(model_50@cache$variance_explained[1]), "50 Factors")
#combine into one df
variance_df <- cbind(variance_5,variance_10,variance_15,variance_20,variance_25,variance_30,variance_35,variance_40,variance_45,variance_50)
variance_df <- rbind(variance_df, Number_of_Factors=c(5,10,15,20,25,30,35,40,45,50))
variance_df <- as.data.frame(t(variance_df))
#plot line graph
ggplot(data = variance_df, aes(x = Number_of_Factors)) +
geom_line(aes(y = RNA, color = "RNA"), linetype = "dotted") +
geom_point(aes(y = RNA, color = "RNA")) +
geom_line(aes(y = CNA, color = "CNA"), linetype = "dotted") +
geom_point(aes(y = CNA, color = "CNA")) +
geom_line(aes(y = Mutation, color = "Mutation"), linetype = "dotted") +
geom_point(aes(y = Mutation, color = "Mutation")) +
geom_line(aes(y = Methylation, color = "Methylation"), linetype = "dotted") +
geom_point(aes(y = Methylation, color = "Methylation")) +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Number of Factors", y = "Variance Explained", title = "Variance Explained per View", color = "Type") +
scale_color_manual(values = brewer.pal(n = 4, name = "Set1"))
variance_df <- variance_df %>%
mutate(Total_Variance = RNA + CNA + Mutation + Methylation)
#plotting the total variance
ggplot(data = variance_df, aes(x = Number_of_Factors)) +
geom_line(aes(y = Total_Variance, color = "Total Variance"), size = 1) +
theme_minimal() +
labs(x = "Number of Factors", y = "Total Variance Explained",
title = "Total Variance Explained by Number of Factors", color = "Type") +
scale_color_manual(values = c("Total Variance" = "black")) +
theme(legend.position = "bottom")
#plot elbo scores
compare_elbo(list(model_5,model_10,model_15,model_20,model_25,model_30,model_35,model_40,model_45,model_50), log = FALSE)
```
#construct custom elbo plot
```{r}
#function to plot elbo scores of each model
get_all_elbo <- function(model_names,n_factors){
elbo_df <- data.frame(model = n_factors)
elbo_list <- c()
for (model in model_names){
elbo <- get_elbo(model)
elbo_list <- c(elbo_list,elbo)
}
elbo_df$elbo_score <- elbo_list
return(elbo_df)
}
elbo_df <- get_all_elbo(c(model_5,model_10,model_15,model_20,model_25,model_30,model_35,model_40,model_45,model_50),c(50))
#plot elbo scores
ggplot(data = elbo_df, aes(x = model, y = elbo_score)) +
geom_line(size = 1, colour = "red") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(x = "Number of Factors", y = "ELBO Score", title = "Evidence Lower Bound per Facor")
```
# select optimal model as the one with 15 factors
# This model was the one with the highest elbo score that met teh criteria of
# explaining >1% variance for at least one omics in each factor
# Check the variance of each omics at within each factor
```{r}
# load model 15 - optimal model
outfile <- file.path(getwd(), paste0("MOFA_model_15.hdf5"))
model_15 <- load_model(outfile)
get_variance_explained(model_15)
```
#check correlation of factors
```{r}
# Calculate the correlation between factors
cor_matrix <- plot_factor_cor(object = model_15, method = "pearson")
# plot correlation between factors
plot_factor_cor(model_15)
#save correlation matrix between factors
write.csv(cor_matrix$corr, "correlation_matrix_between_factors.csv", row.names = TRUE)
```
# add metadata to model 15
```{r}
#subset metdata to only include samples that match the matricies input into MOFA
patient_data <- patient_data[patient_data$X.Patient.ID %in% colnames(cna),]
#is the order of data matrices the same as the patient data?
patient_data$X.Patient.ID == colnames(cna)
#extract metadata from patient data
treatment_arm <- patient_data$Treatment.Arm
response_to_treatment <- patient_data$Response.to.Treatment
gender <- patient_data$Gender
os_months <- as.numeric(patient_data$OS.in.Months)
os_status <- patient_data$OS.Status
dfs_status <- patient_data$DFS.Status
dfs_months <- patient_data$DFS.in.Months
age <- patient_data$Age
Pretreatment_Tumour_state <-patient_data$Pretreatment.T.Stage
Pretreatment_Node_state <- patient_data$Pretreatment.N.Stage
Pretreatment_Metastasis_state <- patient_data$Pretreatment.M.Stage
Post_Treatment_Tumour_Stage <- patient_data$Post.Treatment.T.Stage
Post_Treatment_Node_Stage <- patient_data$Post.Treatment.N.Stage
Post_Treatment_TNM_Staging <- patient_data$Post.Treatment.TNM.Staging
Post_Treatment_Dukes <- patient_data$Post.Treatment.Dukes
Bowel_Screen_Detected <- patient_data$Bowel.Screen.Detected
Tumour_Differentiation <- patient_data$Tumour.Differentiation
Number_of_involved_Lymph_Nodes <- patient_data$Number.of.involved.Lymph.Nodes
Nsamples <- sum(model_15@dimensions$N)
# add metadata to df
sample_metadata <- data.frame(
sample = samples_names(model_15)[[1]],
Pretreatment_Tumour_state = Pretreatment_Tumour_state,
Pretreatment_Node_state = Pretreatment_Node_state,
Pretreatment_Metastasis_state = Pretreatment_Metastasis_state,
EMVI = patient_data$EMVI,
CRM = patient_data$CRM,
TME_Quality = patient_data$TME.Quality,
Post_Treatment_Tumour_Stage = Post_Treatment_Tumour_Stage,
Post_Treatment_Node_Stage = Post_Treatment_Node_Stage,
Post_Treatment_TNM_Staging = Post_Treatment_TNM_Staging,
Post_Treatment_Dukes = Post_Treatment_Dukes,
Bowel_Screen_Detected = Bowel_Screen_Detected,
Gender = gender,
Overall_Survival_Status = os_status,
Overall_Survival_Months = os_months,
Disease_Free_Status = dfs_status,
Disease_Free_Months = dfs_months,
Tumour_Differentiation = Tumour_Differentiation,
Age = age,
Treatment_type = treatment_arm,
Response = response_to_treatment
)
#add to model
samples_metadata(model_15) <- sample_metadata
head(model_15@samples_metadata, n=3)
```
#visualise variance + correlation of factors
```{r}
#check factor correlation for redundancy
plot_factor_cor(model_15)
#get variance explained
plot_variance_explained(model_15 , x = "view", y = "factor", plot_total = TRUE)
#get total variance explained per omic as an integer
model_15@cache$variance_explained$r2_total[[1]]
variance_df <- model_15@cache$variance_explained$r2_per_factor
# write variance explained to csv for supplementary information
write.csv(variance_df,file='variance_explained.csv', row.names=TRUE)
```
#characterise factors - what factors correlate to covarites in the metadata
```{r}
# plot a heatmap of the correlation between factors and covariates from the metadata
# Plot the log pvalue
correlate_factors_with_covariates(model_15,
covariates = c(
"Pretreatment_Tumour_state",
"Pretreatment_Node_state",
"Pretreatment_Metastasis_state",
"EMVI",
"CRM",
"TME_Quality",
"Post_Treatment_Tumour_Stage",
"Post_Treatment_Node_Stage" ,
"Post_Treatment_TNM_Staging",
"Post_Treatment_Dukes" ,
"Bowel_Screen_Detected" ,
"Gender" ,
"Overall_Survival_Status",
"Overall_Survival_Months" ,
"Disease_Free_Status" ,
"Disease_Free_Months" ,
"Tumour_Differentiation",
"Age",
"Treatment_type",
"Response" ),
abs = TRUE,
plot="log_pval"
)
# Extract in a df the exact correlation values
correlated_covariates <- correlate_factors_with_covariates(model_15,
covariates = c(
"Pretreatment_Tumour_state",
"Pretreatment_Node_state",
"Pretreatment_Metastasis_state",
"EMVI",
"CRM",
"TME_Quality",
"Post_Treatment_Tumour_Stage",
"Post_Treatment_Node_Stage" ,
"Post_Treatment_TNM_Staging",
"Post_Treatment_Dukes" ,
"Bowel_Screen_Detected" ,
"Gender" ,
"Overall_Survival_Status",
"Overall_Survival_Months" ,
"Disease_Free_Status" ,
"Disease_Free_Months" ,
"Tumour_Differentiation",
"Age",
"Treatment_type",
"Response" ),
abs = TRUE,
plot="r", return_data = TRUE
)
#save as csv for supplementary information
write.csv(correlated_covariates, file = "correlation_coef_covariates.csv", row.names = TRUE)
# Plot fewer Covariates for better view
correlate_factors_with_covariates(model_15,
covariates = c("Gender","Response" ,"Overall_Survival_Status","Treatment_type", "Disease_Free_Status", "Age"),
abs = TRUE,
plot="log_pval"
)
```
#extract the log adjusted p-values from the correlation analysis and save them.
```{r}
log_p_values <- correlate_factors_with_covariates(model_15,
covariates = c(
"Pretreatment_Tumour_state",
"Pretreatment_Node_state",
"Pretreatment_Metastasis_state",
"EMVI",
"CRM",
"TME_Quality",
"Post_Treatment_Tumour_Stage",
"Post_Treatment_Node_Stage" ,
"Post_Treatment_TNM_Staging",
"Post_Treatment_Dukes" ,
"Bowel_Screen_Detected" ,
"Gender" ,
"Overall_Survival_Status",
"Overall_Survival_Months" ,
"Disease_Free_Status" ,
"Disease_Free_Months" ,
"Tumour_Differentiation",
"Age",
"Treatment_type",
"Response" ),
abs = TRUE,
plot="log_pval",
return_data = TRUE
)
write.csv(log_p_values, file = "correlation_log_pvals_covariates.csv", row.names = TRUE)
```
# boxplots of samples in each factor correlated to response
```{r}
plot_factor(model_15,
factor = c(2,4,5,12),
color_by = "Response",
dot_size = 3,
dodge = TRUE,
stroke = 0.5,
add_violin = T,
add_boxplot = T
)
```
####################################################################
# statistical test
```{r}
# get factor values from each factor strongly correlated to response to treatment
factor_2 <- get_factors(model_15, factors = 2, as.data.frame = T)
factor_4 <- get_factors(model_15, factors = 4, as.data.frame = T)
factor_5 <- get_factors(model_15, factors = 5, as.data.frame = T)
factor_12 <- get_factors(model_15, factors = 12, as.data.frame = T)
metadata <- model_15@samples_metadata
# Perform anova between response categories
# ANOVA
factor_2$Response <- metadata$Response
factor_4$Response <- metadata$Response
factor_5$Response <- metadata$Response
factor_12$Response <- metadata$Response
factor_2$value <- as.numeric(factor_2$value)
factor_4$value <- as.numeric(factor_4$value)
factor_5$value <- as.numeric(factor_5$value)
factor_12$value <- as.numeric(factor_12$value)
# Fit ANOVA model
anova_factor_2 <- aov(value ~ Response, data = factor_2)
anova_factor_4 <- aov(value ~ Response, data = factor_4)
anova_factor_5 <- aov(value ~ Response, data = factor_5)
anova_factor_12 <- aov(value ~ Response, data = factor_12)
# Extract ANOVA summaries
summary_factor_2 <- summary(anova_factor_2)[[1]]
summary_factor_4 <- summary(anova_factor_4)[[1]]
summary_factor_5 <- summary(anova_factor_5)[[1]]
summary_factor_12 <- summary(anova_factor_12)[[1]]
# Combine results into a data frame
anova_results <- data.frame(
Factor = c(2, 4, 5, 12),
"Degrees of Freedom" = c(summary_factor_2[1, "Df"], summary_factor_4[1, "Df"], summary_factor_5[1, "Df"], summary_factor_12[1, "Df"]),
"Sum of Squares" = c(summary_factor_2[1, "Sum Sq"], summary_factor_4[1, "Sum Sq"], summary_factor_5[1, "Sum Sq"], summary_factor_12[1, "Sum Sq"]),
"Mean Squares" = c(summary_factor_2[1, "Mean Sq"], summary_factor_4[1, "Mean Sq"], summary_factor_5[1, "Mean Sq"], summary_factor_12[1, "Mean Sq"]),
"F-value" = c(summary_factor_2[1, "F value"], summary_factor_4[1, "F value"], summary_factor_5[1, "F value"], summary_factor_12[1, "F value"]),
"P-value" = c(summary_factor_2[1, "Pr(>F)"], summary_factor_4[1, "Pr(>F)"], summary_factor_5[1, "Pr(>F)"], summary_factor_12[1, "Pr(>F)"])
)
print(anova_results)
#save as csv for supplementary information
write.csv(anova_results,file = "ANOVA_results.csv", row.names = FALSE)
```
# various visualisations of the important factors
#heatmaps of the top loaded features in the factors that correlated to response for each omic
```{r}
heatmap_function <- function(model, n_features, view, covariate, factor){
for (omics in view){
plot_data_heatmap(model,
factor = factor,
view = omics,
features = n_features,
denoise = FALSE,
cluster_rows = T, cluster_cols = T,
show_colnames = F, show_rownames = T,
annotation_samples = covariate,
annotation_legend = T,
scale = "row"
)
}
}
# make a heatmap for each factor correlated strongly with the response covariate
heatmap_function(model_15,30,c("RNA"),"Response", 2)
heatmap_function(model_15,30,c("RNA"),"Response", 4)
heatmap_function(model_15,30,c("RNA"),"Response", 5)
heatmap_function(model_15,30,c("RNA"),"Response", 12)
```
#plot samples in latent factor space labelled with response type
#for all the factors correlated to response
```{r}
p <- plot_factors(model_15,
factors = c(2,4,5,12),
color_by = "Response",
dot_size = 1.4,
show_missing = T
)
print(p)
```
# plot in 2D factor space
```{r}
p <- plot_factors(model_15,
factors = c(2,4),
color_by = "Response",
dot_size = 3
)
p +
stat_ellipse(aes(color=color_by), geom = "polygon", alpha=0.2)
p <- plot_factors(model_15,
factors = c(2,5),
color_by = "Response",
dot_size = 3
)
p +
stat_ellipse(aes(color=color_by), geom = "polygon", alpha=0.2)
p <- plot_factors(model_15,
factors = c(2,12),
color_by = "Response",
dot_size = 3
)
p +
stat_ellipse(aes(color=color_by), geom = "polygon", alpha=0.2)
```
# Plot the samples in 3D factor space
```{r}
library(plotly)
factors <- get_factors(model_15, as.data.frame = TRUE)
# Extract the first three factors
factor2 <- factors$factor=="Factor2"
factor2 <- factors[factor2,]
factor2 <- factor2$value
factor4 <- factors$factor=="Factor4"
factor4 <- factors[factor4,]
factor4 <- factor4$value
factor5 <- factors$factor=="Factor5"
factor5 <- factors[factor5,]
factor5 <- factor5$value
# Create a data frame with the first three factors
#add condition to df
condition <- sample_metadata$Response
factor_data <- data.frame(Factor2 = factor2, Factor4 = factor4, Factor5 = factor5, condition = condition)
# Create a 3D plot using plotly
fig <- plot_ly(factor_data, x = ~Factor2, y = ~Factor4, z = ~Factor5, color = ~condition, type = 'scatter3d', mode = 'markers', marker = list(size = 6)) %>% # Adjust the size value as needed %>%
layout(
title = "3D Plot of Factors",
scene = list(
xaxis = list(title = 'Factor 2'),
yaxis = list(title = 'Factor 4'),
zaxis = list(title = 'Factor 5')
)
)
# Display the plot
fig
```
#show important features from the most important factor related to reponse
```{r}
top_weighted_features <- function(model,view,factor_list,nfeatures){
# loop through all features
for (factor in factor_list){
#plot the top weighted features
plot <- plot_top_weights(model,
view = view,
factor = factor,
nfeatures = nfeatures, scale = FALSE
)
print(plot)
}
}
# plot feature weights for important factors for all omics
top_weighted_features(model_15, "RNA", c(2,4,5,12), 50)
top_weighted_features(model_15, "CNA", c(2,4,5,12), 50)
top_weighted_features(model_15, "Methylation", c(2,4,5,12), 50)
top_weighted_features(model_15, "Mutation", c(2,4,5,12), 50)
```
#####################################################################################