-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathload-csv.js
More file actions
74 lines (62 loc) · 1.77 KB
/
load-csv.js
File metadata and controls
74 lines (62 loc) · 1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
const fs = require('fs');
const _ = require('lodash');
function extractColumns(data, columnNames) {
const headers = _.first(data);
const indexes = _.map(columnNames, column => headers.indexOf(column));
const extracted = _.map(data, row => _.pullAt(row, indexes));
return extracted;
}
module.exports = function loadCSV(
filename,
{
dataColumns = [],
labelColumns = [],
converters = {},
shuffle = false,
splitTest = false
}
) {
let data = fs.readFileSync(filename, { encoding: 'utf-8' });
data = _.map(data.split('\n'), d => d.split(','));
data = _.dropRightWhile(data, val => _.isEqual(val, ['']));
const headers = _.first(data);
data = _.map(data, (row, index) => {
if (index === 0) {
return row;
}
return _.map(row, (element, index) => {
if (converters[headers[index]]) {
const converted = converters[headers[index]](element);
return _.isNaN(converted) ? element : converted;
}
if (element.indexOf(";")>=0){
var array = element.split(";")
var result = []
array.forEach(elem=>{
result.push(parseFloat(elem.replace('"', '')))
})
}
else{
var result = parseFloat(element.replace('"', ''));
}
return _.isNaN(result) ? element : result;
});
});
let labels = extractColumns(data, labelColumns);
data = extractColumns(data, dataColumns);
data.shift();
labels.shift();
if (splitTest) {
const trainSize = _.isNumber(splitTest)
? splitTest
: Math.floor(data.length / 2);
return {
data: data.slice(trainSize),
labels: labels.slice(trainSize),
testFeatures: data.slice(0, trainSize),
testLabels: labels.slice(0, trainSize)
};
} else {
return { data: data };
}
};