-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataProcessor.js
More file actions
70 lines (60 loc) · 2.09 KB
/
dataProcessor.js
File metadata and controls
70 lines (60 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class DataProcessor {
static async processFiles(phenoFile, snpFiles) {
const phenoData = await this.parseTSV(phenoFile);
const results = [];
for (const snpFile of snpFiles) {
const alleleData = await this.parseCSV(snpFile);
const processedData = this.processPhenoAndSnpData(phenoData, alleleData);
results.push({
fileName: snpFile.name,
majorData: processedData.filter(d => d.allele === 'major'),
minorData: processedData.filter(d => d.allele === 'minor')
});
}
return results;
}
static parseTSV(file) {
return new Promise((resolve) => {
Papa.parse(file, {
header: true,
delimiter: '\t',
complete: (results) => resolve(results.data)
});
});
}
static parseCSV(file) {
return new Promise((resolve) => {
Papa.parse(file, {
header: true,
complete: (results) => resolve(results.data)
});
});
}
static processPhenoAndSnpData(phenoData, alleleData) {
// Rename strain to Accession_ID in allele data
const processedAlleleData = alleleData.map(row => ({
...row,
Accession_ID: row.strain
}));
// Merge data
const mergedData = phenoData.map(pheno => {
const allele = processedAlleleData.find(a => a.Accession_ID === pheno.Accession_ID);
return {
...pheno,
alt: allele ? allele.alt : null
};
});
// Determine allele type
const altValues = mergedData.map(row => row.alt).filter(Boolean);
const mostCommonAllele = this.mode(altValues);
return mergedData.map(row => ({
...row,
allele: row.alt === mostCommonAllele ? 'minor' : 'major'
}));
}
static mode(arr) {
return arr.sort((a,b) =>
arr.filter(v => v === a).length - arr.filter(v => v === b).length
).pop();
}
}