-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLinkInfo.R
More file actions
112 lines (95 loc) · 4.76 KB
/
LinkInfo.R
File metadata and controls
112 lines (95 loc) · 4.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
link_info <- function(in_file, out_file, cut_off = 2, return_d = F, NLN_elety = "ND2"){
require(bio3d)
# Read in pdb and extract data frame -----------------------------------------------------------
pdb <- read.pdb(in_file)
atom <- pdb[['atom']]
# Find cystine residues
CYX <- atom[atom$resid == 'CYX' & atom$elety == 'SG', c('resno','x','y','z')]
# Create links for cystine bonds ---------------------------------------------------------------
ssbond <- data.frame(fresno = numeric(),
felety = character(),
sresno = numeric(),
selety = character(), stringsAsFactors = F)
if (nrow(CYX) != 0)
{
for (i in 1:(dim(CYX)[1]/2))
{
ref <- CYX[1,2:4]
a <- apply(CYX[-1,2:4], 1, function(x) sqrt(sum((x-ref)^2)))
dist_to_ref <- data.frame(site = CYX[-1,1],dist = a)
min_dist <- which.min(dist_to_ref$dist)
if (dist_to_ref$dist[min_dist] < 3)
{
ssbond[i,] <- c(CYX[1,1], 'SG', dist_to_ref$site[min_dist], 'SG')
CYX <- CYX[-c(1, min_dist + 1),]
}
}
}
# Find the first residue of each glycan -------------------------------------------------------
all_4YB <- unique(atom$resno[atom$resid == '4YB'])
first_het_res <- all_4YB[all_4YB %in% (all_4YB - 1)]
hetatoms <- subset(atom, resno %in% first_het_res & elety == 'C1', select = c('resno','x','y','z'))
n_glyc <- dim(hetatoms)[1]
# Create links for glycan bonds ---------------------------------------------------------------
het_to_het_links <- data.frame(fresno = numeric(),
felety = character(),
sresno = numeric(),
selety = character(), stringsAsFactors = F)
for (i in 1:n_glyc)
{
het_to_het_links[i,] <- c(hetatoms$resno[i], 'O4', hetatoms$resno[i] + 1, 'C1')
het_to_het_links[i + n_glyc,] <- c(hetatoms$resno[i] + 1, 'O4', hetatoms$resno[i] + 2, 'C1')
het_to_het_links[i + 2*n_glyc,] <- c(hetatoms$resno[i] + 2, 'O6', hetatoms$resno[i] + 3, 'C1')
het_to_het_links[i + 3*n_glyc,] <- c(hetatoms$resno[i] + 2, 'O3', hetatoms$resno[i] + 8, 'C1')
het_to_het_links[i + 4*n_glyc,] <- c(hetatoms$resno[i] + 8, 'O2', hetatoms$resno[i] + 9, 'C1')
het_to_het_links[i + 5*n_glyc,] <- c(hetatoms$resno[i] + 9, 'O2', hetatoms$resno[i] + 10, 'C1')
het_to_het_links[i + 6*n_glyc,] <- c(hetatoms$resno[i] + 3, 'O6', hetatoms$resno[i] + 4, 'C1')
het_to_het_links[i + 7*n_glyc,] <- c(hetatoms$resno[i] + 3, 'O3', hetatoms$resno[i] + 6, 'C1')
het_to_het_links[i + 8*n_glyc,] <- c(hetatoms$resno[i] + 6, 'O2', hetatoms$resno[i] + 7, 'C1')
het_to_het_links[i + 9*n_glyc,] <- c(hetatoms$resno[i] + 4, 'O2', hetatoms$resno[i] + 5, 'C1')
}
# Find NLN residues
Linkto <- atom[atom$elety == NLN_elety &
atom$resid == "NLN",
c('resno','x','y','z')]
links <- data.frame(resno = numeric(), elety = character(),
hetresno = numeric(), hetelety = character(), stringsAsFactors = F)
if (dim(Linkto)[1] != dim(hetatoms)[1])
{
stop('The number of glycans and the number of NLNs do not match.')
}
for (i in 1:n_glyc)
{
ref <- Linkto[i, 2:4]
a <- apply(hetatoms[, 2:4], 1, function(x) sqrt(sum((x-ref)^2)))
dist_to_ref <- data.frame(site = hetatoms[, 1], dist = a)
min_dist <- which.min(dist_to_ref$dist)
if (dist_to_ref$dist[min_dist] < cut_off)
{
links[i,] <- c(Linkto[i,1], 'ND2', dist_to_ref$site[min_dist], 'C1')
}
else
{
warning(paste('Check if residue', Linkto[i,1], 'is really glycosylated.',
'If it is set the cutoff value higher'))
}
}
if (return_d == F)
{
# Write cystine bonds to output file ----------------------------------------------------------
ssbond$out_col <- paste('bond mol.', ssbond[,1], '.', ssbond[,2], ' mol.', ssbond[,3], '.', ssbond[,4], sep = '')
write.table(ssbond$out_col, out_file, quote = F, row.names = F, col.names = F)
# Write glycan bonds to output file -------------------------------------------------------------
het_to_het_links$out_col <- paste('bond mol.', het_to_het_links[,1], '.', het_to_het_links[,2],
' mol.', het_to_het_links[,3], '.', het_to_het_links[,4], sep = '')
write.table(het_to_het_links$out_col, out_file, quote = F, row.names = F, col.names = F, append = T)
# Write protein to glycan bonds to output file
links$out_col <- paste('bond mol.', links[,1], '.', links[,2],
' mol.', links[,3], '.', links[,4], sep = '')
write.table(links$out_col, out_file, quote = F, row.names = F, col.names = F, append = T)
}
if (return_d == T)
{
return(links[, c('resno', 'hetresno')])
}
}