forked from bcgsc/pori_graphkb_loader
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.js
More file actions
150 lines (136 loc) · 5.36 KB
/
index.js
File metadata and controls
150 lines (136 loc) · 5.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/**
* Import the RefSeq transcripts, ignoring version numbers for now
* @module importer/refseq
*/
const { loadDelimToJson } = require('../util');
const { rid } = require('../graphkb');
const { logger } = require('../logging');
const _entrez = require('../entrez/gene');
const { refseq: SOURCE_DEFN } = require('../sources');
/**
* Parse the tab delimited file to upload features and their relationships
* For each versioned feature, a generalization (non-versioned) feature is created
* to facilitate linking from other sources where the version may not be given
*
* @param {object} opt options
* @param {string} opt.filename path to the tab delimited file
* @param {ApiConnection} opt.conn the api connection object
*/
const uploadFile = async ({ filename, conn, maxRecords }) => {
const json = await loadDelimToJson(filename);
const source = await conn.addRecord({
content: SOURCE_DEFN,
existsOk: true,
fetchConditions: { name: SOURCE_DEFN.name },
target: 'Source',
});
if (maxRecords) {
logger.log('info', `Loading ${maxRecords} gene records`);
} else {
logger.log('info', `Loading ${json.length} gene records`);
}
const counts = { error: 0, skipped: 0, success: 0 };
// batch load entrez genes
await _entrez.preLoadCache(conn);
await _entrez.fetchAndLoadByIds(conn, json.map(rec => rec.GeneID));
for (let i = 0; i < json.length; i++) {
if (maxRecords && i > maxRecords) {
logger.warn(`not loading all content due to max records limit (${maxRecords})`);
break;
}
try {
const { RNA, GeneID, Protein } = json[i];
logger.info(`processing (${i} / ${json.length}) ${RNA}`);
// Load the RNA
const [rnaName, rnaVersion] = RNA.split('.');
const general = await conn.addRecord({
content: {
biotype: 'transcript', source: rid(source), sourceId: rnaName, sourceIdVersion: null,
},
existsOk: true,
target: 'Feature',
});
const versioned = await conn.addRecord({
content: {
biotype: 'transcript', source: rid(source), sourceId: rnaName, sourceIdVersion: rnaVersion,
},
existsOk: true,
target: 'Feature',
});
// make the general an alias of the versioned
await conn.addRecord({
content: { in: rid(versioned), out: rid(general), source: rid(source) },
existsOk: true,
fetchExisting: false,
target: 'generalizationof',
});
try {
const [hgnc] = await _entrez.fetchAndLoadByIds(conn, [GeneID]);
await conn.addRecord({
content: { in: rid(hgnc), out: rid(general), source: rid(source) },
existsOk: true,
fetchExisting: false,
target: 'elementof',
});
} catch (err) {
logger.log('error', `failed cross-linking from ${general.sourceId} to ${GeneID}`);
logger.error(err);
}
// load the protein
if (Protein) {
const [proteinName, proteinVersion] = Protein.split('.');
const generalProtein = await conn.addRecord({
content: {
biotype: 'protein', source: rid(source), sourceId: proteinName, sourceIdVersion: null,
},
existsOk: true,
target: 'Feature',
});
const versionedProtein = await conn.addRecord({
content: {
biotype: 'protein', source: rid(source), sourceId: proteinName, sourceIdVersion: proteinVersion,
},
existsOk: true,
target: 'Feature',
});
// make the general an alias of the versioned
await conn.addRecord({
content: {
in: rid(versionedProtein),
out: rid(generalProtein),
source: rid(source),
},
existsOk: true,
fetchExisting: false,
target: 'generalizationof',
});
await conn.addRecord({
content: {
in: rid(general),
out: rid(generalProtein),
source: rid(source),
},
existsOk: true,
fetchExisting: false,
target: 'elementof',
});
await conn.addRecord({
content: {
in: rid(versioned),
out: rid(versionedProtein),
source: rid(source),
},
existsOk: true,
fetchExisting: false,
target: 'elementof',
});
}
counts.success++;
} catch (err) {
logger.error(err);
counts.error++;
}
}
logger.info(JSON.stringify(counts));
};
module.exports = { SOURCE_DEFN, uploadFile };