@@ -7,20 +7,63 @@ library("igraph")
77
88# #### AlleleNetwork Alternative ###############################################
99
10+ Net.abr = read_delim(" Net.abr.net" , col_names = FALSE , delim = " \t " )
11+ Net.bac = read_delim(" Net.bac.net" , col_names = FALSE , delim = " \t " )
12+ Net.rel = read_delim(" Net.rel.net" , col_names = FALSE , delim = " \t " )
1013
14+ colnames(Net.abr ) = c(" Source" , " Target" , " value" )
15+ colnames(Net.bac ) = c(" Source" , " Target" , " value" )
16+ colnames(Net.rel ) = c(" Source" , " Target" , " value" )
1117
12- Net.abr.graph = graph_from_edgelist(as.matrix( Net.abr ), directed = FALSE )
13- Net.bac.graph = graph_from_edgelist(as.matrix( Net.bac ), directed = FALSE )
14- Net.rel.graph = graph_from_edgelist(as.matrix( Net.rel ), directed = FALSE )
18+ Net.abr.matrix = Net.abr % > % filter( ! is.na( Target ), ! is.na( Source )) % > % spread( Source , value , fill = 0 )
19+ Net.bac.matrix = Net.bac % > % filter( ! is.na( Target ), ! is.na( Source )) % > % spread( Source , value , fill = 0 )
20+ Net.rel.matrix = Net.rel % > % filter( ! is.na( Target ), ! is.na( Source )) % > % spread( Source , value , fill = 0 )
1521
16- Net.abr.matrix = as_adjacency_matrix(Net.abr.graph )
17- Net.abr.mcl = mcl(Net.abr.matrix , addLoops = TRUE , allow1 = TRUE )
22+ tmp = Net.abr.matrix $ Target
23+ Net.abr.matrix = Net.abr.matrix [, - 1 ]
24+ rownames(Net.abr.matrix ) = tmp
25+
26+ tmp = Net.bac.matrix $ Target
27+ Net.bac.matrix = Net.bac.matrix [, - 1 ]
28+ rownames(Net.bac.matrix ) = tmp
29+
30+
31+ tmp = Net.rel.matrix $ Target
32+ Net.rel.matrix = Net.rel.matrix [, - 1 ]
33+ rownames(Net.rel.matrix ) = tmp
34+
35+
36+ Net.abr.graph = graph_from_adjacency_matrix(
37+ as.matrix(Net.abr.matrix ),
38+ mode = " upper" ,
39+ weighted = TRUE ,
40+ diag = FALSE
41+ )
42+ Net.bac.graph = graph_from_adjacency_matrix(
43+ as.matrix(Net.abr.matrix ),
44+ mode = " upper" ,
45+ weighted = TRUE ,
46+ diag = FALSE
47+ )
48+ Net.rel.graph = graph_from_adjacency_matrix(
49+ as.matrix(Net.abr.matrix ),
50+ mode = " upper" ,
51+ weighted = TRUE ,
52+ diag = FALSE
53+ )
54+
55+ # ############### Using MCL algorithm for clustering classification #########################
56+ Net.abr.mcl = mcl(as_adjacency_matrix(Net.abr.graph ),
57+ addLoops = TRUE ,
58+ allow1 = TRUE )
59+ Net.bac.mcl = mcl(as_adjacency_matrix(Net.bac.graph ),
60+ addLoops = TRUE ,
61+ allow1 = TRUE )
62+ Net.rel.mcl = mcl(as_adjacency_matrix(Net.rel.graph ),
63+ addLoops = TRUE ,
64+ allow1 = TRUE )
1865
19- Net.bac.matrix = as_adjacency_matrix(Net.bac.graph )
20- Net.bac.mcl = mcl(Net.bac.matrix , addLoops = TRUE , allow1 = TRUE )
2166
22- Net.rel.matrix = as_adjacency_matrix(Net.rel.graph )
23- Net.rel.mcl = mcl(Net.rel.matrix , addLoops = TRUE , allow1 = TRUE )
2467
2568
2669Net.abr.cl = data_frame(
@@ -42,10 +85,44 @@ Net.rel.cl = data_frame(
4285Net.all.cl = bind_rows(Net.abr.cl ,
4386 Net.bac.cl ,
4487 Net.rel.cl )
88+ # ########### END MCL algorithm for clustering classification ################################
89+
90+ # ########### Clustering by Components (Calculate the maximal (weakly or strongly) connected components of a graph) #############
91+ # ########### This is an alternative if MCL fails because the size of the AlleleNetwork ####################
92+
93+ Net.abr.comp = components(Net.abr.graph )
94+ Net.bac.comp = components(Net.bac.graph )
95+ Net.rel.comp = components(Net.rel.graph )
96+
97+ Net.abr.cl = data_frame(
98+ Gene = rownames(as.data.frame(Net.abr.comp $ membership )),
99+ Cluster = Net.abr.comp $ membership ,
100+ DataSet = " abr"
101+ )
102+ Net.bac.cl = data_frame(
103+ Gene = rownames(as.data.frame(Net.bac.comp $ membership )),
104+ Cluster = Net.bac.comp $ membership ,
105+ DataSet = " bac"
106+ )
107+ Net.rel.cl = data_frame(
108+ Gene = rownames(as.data.frame(Net.rel.comp $ membership )),
109+ Cluster = Net.rel.comp $ membership ,
110+ DataSet = " rel"
111+ )
112+ Net.all.cl = bind_rows(Net.abr.cl ,
113+ Net.bac.cl ,
114+ Net.rel.cl )
115+
116+ # ########## END of Clustering by components ################################################
117+
45118
46- Connections.abr = bind_rows(Net.abr %> % select(X = Gen , N ), Net.abr %> % select(X = siguienteG , N )) %> % group_by(X ) %> % summarise(N = sum(N ))
47- Connections.bac = bind_rows(Net.bac %> % select(X = Gen , N ), Net.bac %> % select(X = siguienteG , N )) %> % group_by(X ) %> % summarise(N = sum(N ))
48- Connections.rel = bind_rows(Net.rel %> % select(X = Gen , N ), Net.rel %> % select(X = siguienteG , N )) %> % group_by(X ) %> % summarise(N = sum(N ))
119+
120+ Connections.abr = bind_rows(Net.abr %> % select(X = Target , value ),
121+ Net.abr %> % select(X = Source , value )) %> % group_by(X ) %> % summarise(N = sum(value ))
122+ Connections.bac = bind_rows(Net.bac %> % select(X = Target , value ),
123+ Net.bac %> % select(X = Source , value )) %> % group_by(X ) %> % summarise(N = sum(value ))
124+ Connections.rel = bind_rows(Net.rel %> % select(X = Target , value ),
125+ Net.rel %> % select(X = Source , value )) %> % group_by(X ) %> % summarise(N = sum(value ))
49126Connections.all = bind_rows(Connections.abr , Connections.bac , Connections.rel )
50127colnames(Connections.all ) = c(" Gene" , " Connections" )
51128
@@ -83,13 +160,19 @@ Full.table = Full.table %>% separate(Sample, c("Sample", "DataSet", "kk"), sep =
83160 " \\ ." ) %> % select(- kk ) %> % full_join(. , Net.all.cl )
84161Full.table = Full.table %> % left_join(. , Connections.all )
85162Full.table $ Connections [is.na(Full.table $ Connections )] = 0
86- reads = read_table(" ../count.reads" ,col_names = FALSE )
87- colnames(reads ) = c(" TotalReads" ," Sample" )
88- reads $ Sample = gsub(" .R1.fastq" ," " ,reads $ Sample )
89- reads $ TotalReads = reads $ TotalReads / 4
90- Full.table = left_join(Full.table , Nreads )
163+ reads = read_delim(" ../Nreads.txt" , col_names = TRUE , delim = " ," ) # ### File with the reads count per Sample
164+
165+ Full.table = left_join(Full.table , reads )
91166Full.table = Full.table %> % mutate(FinalClust = ifelse(is.na(Cluster ), Gene , paste(DataSet , Cluster , sep =
92167 " " )))
93168RepresentativesOfCluster = Full.table %> % group_by(FinalClust , Gene ) %> % summarise(conn = max(Connections )) %> % group_by(FinalClust ) %> % mutate(top = max(conn )) %> % filter(conn == top ) %> % group_by(FinalClust ) %> % summarise(Representative = first(Gene ))
94169Full.table = full_join(Full.table , RepresentativesOfCluster )
170+
95171# #### END Join AlleleNetwork and Data abundance ############
172+
173+
174+
175+ Full.table = Full.table %> % mutate(RPKM = RPK * 1e6 / TotalReads ,
176+ UpM = Uniq * 1e6 / TotalReads )
177+
178+
0 commit comments