Skip to content

Commit 4fcffcc

Browse files
valval
authored andcommitted
update
1 parent e46f275 commit 4fcffcc

10 files changed

Lines changed: 73609 additions & 17 deletions

ScriptAlleleNetwork.r

Lines changed: 100 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,63 @@ library("igraph")
77

88
##### AlleleNetwork Alternative ###############################################
99

10+
Net.abr = read_delim("Net.abr.net", col_names = FALSE, delim = "\t")
11+
Net.bac = read_delim("Net.bac.net", col_names = FALSE, delim = "\t")
12+
Net.rel = read_delim("Net.rel.net", col_names = FALSE, delim = "\t")
1013

14+
colnames(Net.abr) = c("Source", "Target", "value")
15+
colnames(Net.bac) = c("Source", "Target", "value")
16+
colnames(Net.rel) = c("Source", "Target", "value")
1117

12-
Net.abr.graph = graph_from_edgelist(as.matrix(Net.abr), directed = FALSE)
13-
Net.bac.graph = graph_from_edgelist(as.matrix(Net.bac), directed = FALSE)
14-
Net.rel.graph = graph_from_edgelist(as.matrix(Net.rel), directed = FALSE)
18+
Net.abr.matrix = Net.abr %>% filter(!is.na(Target), !is.na(Source)) %>% spread(Source, value, fill = 0)
19+
Net.bac.matrix = Net.bac %>% filter(!is.na(Target), !is.na(Source)) %>% spread(Source, value, fill = 0)
20+
Net.rel.matrix = Net.rel %>% filter(!is.na(Target), !is.na(Source)) %>% spread(Source, value, fill = 0)
1521

16-
Net.abr.matrix = as_adjacency_matrix(Net.abr.graph)
17-
Net.abr.mcl = mcl(Net.abr.matrix, addLoops = TRUE, allow1 = TRUE)
22+
tmp = Net.abr.matrix$Target
23+
Net.abr.matrix = Net.abr.matrix[, -1]
24+
rownames(Net.abr.matrix) = tmp
25+
26+
tmp = Net.bac.matrix$Target
27+
Net.bac.matrix = Net.bac.matrix[, -1]
28+
rownames(Net.bac.matrix) = tmp
29+
30+
31+
tmp = Net.rel.matrix$Target
32+
Net.rel.matrix = Net.rel.matrix[, -1]
33+
rownames(Net.rel.matrix) = tmp
34+
35+
36+
Net.abr.graph = graph_from_adjacency_matrix(
37+
as.matrix(Net.abr.matrix),
38+
mode = "upper",
39+
weighted = TRUE,
40+
diag = FALSE
41+
)
42+
Net.bac.graph = graph_from_adjacency_matrix(
43+
as.matrix(Net.abr.matrix),
44+
mode = "upper",
45+
weighted = TRUE,
46+
diag = FALSE
47+
)
48+
Net.rel.graph = graph_from_adjacency_matrix(
49+
as.matrix(Net.abr.matrix),
50+
mode = "upper",
51+
weighted = TRUE,
52+
diag = FALSE
53+
)
54+
55+
################ Using MCL algorithm for clustering classification #########################
56+
Net.abr.mcl = mcl(as_adjacency_matrix(Net.abr.graph),
57+
addLoops = TRUE,
58+
allow1 = TRUE)
59+
Net.bac.mcl = mcl(as_adjacency_matrix(Net.bac.graph),
60+
addLoops = TRUE,
61+
allow1 = TRUE)
62+
Net.rel.mcl = mcl(as_adjacency_matrix(Net.rel.graph),
63+
addLoops = TRUE,
64+
allow1 = TRUE)
1865

19-
Net.bac.matrix = as_adjacency_matrix(Net.bac.graph)
20-
Net.bac.mcl = mcl(Net.bac.matrix, addLoops = TRUE, allow1 = TRUE)
2166

22-
Net.rel.matrix = as_adjacency_matrix(Net.rel.graph)
23-
Net.rel.mcl = mcl(Net.rel.matrix, addLoops = TRUE, allow1 = TRUE)
2467

2568

2669
Net.abr.cl = data_frame(
@@ -42,10 +85,44 @@ Net.rel.cl = data_frame(
4285
Net.all.cl = bind_rows(Net.abr.cl,
4386
Net.bac.cl,
4487
Net.rel.cl)
88+
############ END MCL algorithm for clustering classification ################################
89+
90+
############ Clustering by Components (Calculate the maximal (weakly or strongly) connected components of a graph) #############
91+
############ This is an alternative if MCL fails because the size of the AlleleNetwork ####################
92+
93+
Net.abr.comp = components(Net.abr.graph)
94+
Net.bac.comp = components(Net.bac.graph)
95+
Net.rel.comp = components(Net.rel.graph)
96+
97+
Net.abr.cl = data_frame(
98+
Gene = rownames(as.data.frame(Net.abr.comp$membership)),
99+
Cluster = Net.abr.comp$membership,
100+
DataSet = "abr"
101+
)
102+
Net.bac.cl = data_frame(
103+
Gene = rownames(as.data.frame(Net.bac.comp$membership)),
104+
Cluster = Net.bac.comp$membership,
105+
DataSet = "bac"
106+
)
107+
Net.rel.cl = data_frame(
108+
Gene = rownames(as.data.frame(Net.rel.comp$membership)),
109+
Cluster = Net.rel.comp$membership,
110+
DataSet = "rel"
111+
)
112+
Net.all.cl = bind_rows(Net.abr.cl,
113+
Net.bac.cl,
114+
Net.rel.cl)
115+
116+
########### END of Clustering by components ################################################
117+
45118

46-
Connections.abr = bind_rows(Net.abr %>% select(X = Gen, N), Net.abr %>% select(X = siguienteG, N)) %>% group_by(X) %>% summarise(N = sum(N))
47-
Connections.bac = bind_rows(Net.bac %>% select(X = Gen, N), Net.bac %>% select(X = siguienteG, N)) %>% group_by(X) %>% summarise(N = sum(N))
48-
Connections.rel = bind_rows(Net.rel %>% select(X = Gen, N), Net.rel %>% select(X = siguienteG, N)) %>% group_by(X) %>% summarise(N = sum(N))
119+
120+
Connections.abr = bind_rows(Net.abr %>% select(X = Target, value),
121+
Net.abr %>% select(X = Source, value)) %>% group_by(X) %>% summarise(N = sum(value))
122+
Connections.bac = bind_rows(Net.bac %>% select(X = Target, value),
123+
Net.bac %>% select(X = Source, value)) %>% group_by(X) %>% summarise(N = sum(value))
124+
Connections.rel = bind_rows(Net.rel %>% select(X = Target, value),
125+
Net.rel %>% select(X = Source, value)) %>% group_by(X) %>% summarise(N = sum(value))
49126
Connections.all = bind_rows(Connections.abr, Connections.bac, Connections.rel)
50127
colnames(Connections.all) = c("Gene", "Connections")
51128

@@ -83,13 +160,19 @@ Full.table = Full.table %>% separate(Sample, c("Sample", "DataSet", "kk"), sep =
83160
"\\.") %>% select(-kk) %>% full_join(., Net.all.cl)
84161
Full.table = Full.table %>% left_join(., Connections.all)
85162
Full.table$Connections[is.na(Full.table$Connections)] = 0
86-
reads = read_table("../count.reads",col_names = FALSE)
87-
colnames(reads) = c("TotalReads","Sample")
88-
reads$Sample = gsub(".R1.fastq","",reads$Sample)
89-
reads$TotalReads = reads$TotalReads/4
90-
Full.table = left_join(Full.table, Nreads)
163+
reads = read_delim("../Nreads.txt", col_names = TRUE, delim = ",") #### File with the reads count per Sample
164+
165+
Full.table = left_join(Full.table, reads)
91166
Full.table = Full.table %>% mutate(FinalClust = ifelse(is.na(Cluster), Gene, paste(DataSet, Cluster, sep =
92167
"")))
93168
RepresentativesOfCluster = Full.table %>% group_by(FinalClust, Gene) %>% summarise(conn = max(Connections)) %>% group_by(FinalClust) %>% mutate(top = max(conn)) %>% filter(conn == top) %>% group_by(FinalClust) %>% summarise(Representative = first(Gene))
94169
Full.table = full_join(Full.table, RepresentativesOfCluster)
170+
95171
##### END Join AlleleNetwork and Data abundance ############
172+
173+
174+
175+
Full.table = Full.table %>% mutate(RPKM = RPK * 1e6 / TotalReads,
176+
UpM = Uniq * 1e6 / TotalReads)
177+
178+

db/ResCapReference.1.bt2

5.98 MB
Binary file not shown.

db/ResCapReference.2.bt2

870 KB
Binary file not shown.

db/ResCapReference.3.bt2

390 KB
Binary file not shown.

db/ResCapReference.4.bt2

870 KB
Binary file not shown.

0 commit comments

Comments
 (0)