-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathGenerate_Random_Signatures.R
More file actions
75 lines (52 loc) · 2.06 KB
/
Generate_Random_Signatures.R
File metadata and controls
75 lines (52 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
### Generate random signatures
datasets <- list.files('data/Database/Primary/')
gene.list <- c()
gene.list.small <- c()
for (dataset in datasets) {
eSet <- readRDS(file.path('data/Database/Primary/', dataset))
expr <- exprs(eSet)
if (nrow(expr)<10000) {
gene.list.small[[dataset]] <- rownames(expr)
next
}
gene.list[[dataset]] <- rownames(expr)
}
genes <- Reduce(intersect, gene.list)
genes
random.signatures <- list()
random.signatures.wu <- list()
for (n in c(10,30,50,100)) {
for (i in 1:3) {
signature.name <- paste('Rand',n,i,sep='_')
random.signatures[[signature.name]] <- sample(genes, n)
signature.name <- paste('Wu_Rand',n,i,sep='_')
random.signatures.wu[[signature.name]] <- sample(gene.list.small[[1]], n)
}
}
saveRDS(random.signatures, file='data/Random_Signatures.RDS')
saveRDS(random.signatures.wu, file='data/Random_Signatures_Wu.RDS')
###
signatures <- c('Agell','Bibikova','Bismar','Decipher','Ding','Glinsky','Irshad',
'Jennifer','Jia','Kamoun','Long','Luca','Mo','Nakagawa','Olmos',
'Oncotype','Penney','Planche','Prolaris','Ramaswamy','Ramos_Montoya',
'Ross_Adams','Ross_Robert','Sharma','Talantov','Varambally','Wu','Yang',
'Yu')
signature.genes <- c()
for (signature.name in signatures) {
message (signature.name)
signature <- read_xlsx(path = 'data/Classifiers.xlsx', sheet=signature.name)
signature.genes <- unique(c(signature.genes, signature$Ensembl))
}
signature.genes
ensembl <- readRDS('data/Annotation/ENSEMBL_Annotation_Human_V98_20191230.RDS')
ensembl
for (signature in names(random.signatures)) {
idx <- match(random.signatures[[signature]], ensembl$ensembl_gene_id)
#print (ensembl$external_gene_name[idx])
print (sum(ensembl$external_gene_name[idx] %in% signature.genes))
}
for (signature in names(random.signatures.wu)) {
idx <- match(random.signatures.wu[[signature]], ensembl$ensembl_gene_id)
#print (ensembl$external_gene_name[idx])
print (sum(ensembl$external_gene_name[idx] %in% signature.genes))
}