From 79e5b531f918e685fb945989aecd298e49c963e9 Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 13:05:26 +0200 Subject: [PATCH 1/8] added custom separator and delimiter to parse function --- index.js | 24 ++++++++++++++------ test/__test__/delimiter1.json | 22 +++++++++++++++++++ test/__test__/delimiter2.json | 22 +++++++++++++++++++ test/__test__/separator1.json | 22 +++++++++++++++++++ test/__test__/separator2.json | 22 +++++++++++++++++++ test/options.spec.js | 41 +++++++++++++++++++++++++++++++++++ 6 files changed, 146 insertions(+), 7 deletions(-) create mode 100644 test/__test__/delimiter1.json create mode 100644 test/__test__/delimiter2.json create mode 100644 test/__test__/separator1.json create mode 100644 test/__test__/separator2.json diff --git a/index.js b/index.js index 967cc1c..6ea8985 100644 --- a/index.js +++ b/index.js @@ -3,6 +3,8 @@ * * options * - typed - infer types [false] + * - separator - use custom separator [,] + * - delimiter - use custom delimiter ["] * * @static * @param {string} csv the CSV string to parse @@ -20,7 +22,10 @@ export function parse (csv, options, reviver = v => v) { ctx.col = 1 ctx.row = 1 - const lexer = /"|,|\r\n|\n|\r|[^",\r\n]+/y + if(ctx.options.delimiter === undefined) ctx.options.delimiter = '"'; + if(ctx.options.separator === undefined) ctx.options.separator = ','; + + const lexer = new RegExp(`${escapeRegExp(ctx.options.delimiter)}|${escapeRegExp(ctx.options.separator)}|\r\n|\n|\r|[^${escapeRegExp(ctx.options.delimiter)}${escapeRegExp(ctx.options.separator)}\r\n]+`, 'y') const isNewline = /^(\r\n|\n|\r)$/ let matches = [] @@ -33,10 +38,10 @@ export function parse (csv, options, reviver = v => v) { switch (state) { case 0: // start of entry switch (true) { - case match === '"': + case match === ctx.options.delimiter: state = 3 break - case match === ',': + case match === ctx.options.separator: state = 0 valueEnd(ctx) break @@ -53,7 +58,7 @@ export function parse (csv, options, reviver = v => v) { break case 2: // un-delimited input switch (true) { - case match === ',': + case match === ctx.options.separator: state = 0 valueEnd(ctx) break @@ -69,7 +74,7 @@ export function parse (csv, options, reviver = v => v) { break case 3: // delimited input switch (true) { - case match === '"': + case match === ctx.options.delimiter: state = 4 break default: @@ -80,11 +85,11 @@ export function parse (csv, options, reviver = v => v) { break case 4: // escaped or closing delimiter switch (true) { - case match === '"': + case match === ctx.options.delimiter: state = 3 ctx.value += match break - case match === ',': + case match === ctx.options.separator: state = 0 valueEnd(ctx) break @@ -192,3 +197,8 @@ function inferType (value) { return value } } + +/** @private */ +function escapeRegExp(str) { + return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); +} \ No newline at end of file diff --git a/test/__test__/delimiter1.json b/test/__test__/delimiter1.json new file mode 100644 index 0000000..7184214 --- /dev/null +++ b/test/__test__/delimiter1.json @@ -0,0 +1,22 @@ +{ + "description": [ + "When options.delimiter is set the parser uses this character as delimiter" + ], + "csv": [ + "'123','456','789'", + "false,true,Test", + "" + ], + "json": [ + [ + "123", + "456", + "789" + ], + [ + "false", + "true", + "Test" + ] + ] + } \ No newline at end of file diff --git a/test/__test__/delimiter2.json b/test/__test__/delimiter2.json new file mode 100644 index 0000000..faa1eff --- /dev/null +++ b/test/__test__/delimiter2.json @@ -0,0 +1,22 @@ +{ + "description": [ + "When options.delimiter is set the parser uses this character as delimiter (RegExp meta char)" + ], + "csv": [ + "|123|,|456|,|789|", + "false,true,Test", + "" + ], + "json": [ + [ + "123", + "456", + "789" + ], + [ + "false", + "true", + "Test" + ] + ] + } \ No newline at end of file diff --git a/test/__test__/separator1.json b/test/__test__/separator1.json new file mode 100644 index 0000000..c688bfd --- /dev/null +++ b/test/__test__/separator1.json @@ -0,0 +1,22 @@ +{ + "description": [ + "When options.separator is set the parser uses this character as separator" + ], + "csv": [ + "123;456;789", + "false;true;Test", + "" + ], + "json": [ + [ + "123", + "456", + "789" + ], + [ + "false", + "true", + "Test" + ] + ] + } \ No newline at end of file diff --git a/test/__test__/separator2.json b/test/__test__/separator2.json new file mode 100644 index 0000000..87a9381 --- /dev/null +++ b/test/__test__/separator2.json @@ -0,0 +1,22 @@ +{ + "description": [ + "When options.separator is set the parser uses this character as separator (RegExp meta char)" + ], + "csv": [ + "123|456|789", + "false|true|Test", + "" + ], + "json": [ + [ + "123", + "456", + "789" + ], + [ + "false", + "true", + "Test" + ] + ] + } \ No newline at end of file diff --git a/test/options.spec.js b/test/options.spec.js index 177094d..edb8ce5 100644 --- a/test/options.spec.js +++ b/test/options.spec.js @@ -1,6 +1,7 @@ import test from 'tape' import * as CSV from '@vanillaes/csv' import { createRequire } from 'module' +import { sep } from 'path' const require = createRequire(import.meta.url) const reviver1 = require('./__test__/reviver1.json') const reviver2 = require('./__test__/reviver2.json') @@ -10,6 +11,10 @@ const replacer1 = require('./__test__/replacer1.json') const replacer2 = require('./__test__/replacer2.json') const eof1 = require('./__test__/eof1.json') const eof2 = require('./__test__/eof2.json') +const separator1 = require('./__test__/separator1.json') +const separator2 = require('./__test__/separator2.json') +const delimiter1 = require('./__test__/delimiter1.json') +const delimiter2 = require('./__test__/delimiter2.json') test('Reviver #1 - The reviver should append 1 to each value', (t) => { const expect = reviver1.json @@ -82,3 +87,39 @@ test('EOF #2- When set to false the formatter should not include a newline at th t.end() }) + +test('Separator #1 - When set the parser should use this character as separator', (t) => { + const expect = separator1.json + const actual = CSV.parse(separator1.csv.join('\n'), { separator: ';' }) + + t.deepEqual(actual, expect) + + t.end() +}) + +test('Separator #2 - The parser accepts regular expression meta characters as separator', (t) => { + const expect = separator2.json + const actual = CSV.parse(separator2.csv.join('\n'), { separator: '|' }) + + t.deepEqual(actual, expect) + + t.end() +}) + +test('Delimiter #1 - When set the parser should use this character as delimiter', (t) => { + const expect = delimiter1.json + const actual = CSV.parse(delimiter1.csv.join('\n'), { delimiter: '\'' }) + + t.deepEqual(actual, expect) + + t.end() +}) + +test('Delimiter #2 - The parser accepts regular expression meta characters as delimiter', (t) => { + const expect = delimiter2.json + const actual = CSV.parse(delimiter2.csv.join('\n'), { delimiter: '|' }) + + t.deepEqual(actual, expect) + + t.end() +}) \ No newline at end of file From 04081609c98edb57c4329712ba291506db1b3629 Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 13:31:26 +0200 Subject: [PATCH 2/8] asserted that delimiter and separator are always one character --- index.js | 9 +++++++-- test/options.spec.js | 28 +++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 6ea8985..9882654 100644 --- a/index.js +++ b/index.js @@ -22,8 +22,13 @@ export function parse (csv, options, reviver = v => v) { ctx.col = 1 ctx.row = 1 - if(ctx.options.delimiter === undefined) ctx.options.delimiter = '"'; - if(ctx.options.separator === undefined) ctx.options.separator = ','; + ctx.options.delimiter = ctx.options.delimiter === undefined ? '"' : options.delimiter; + if(ctx.options.delimiter.length > 1 | ctx.options.delimiter.length === 0) + throw Error(`CSVError: delimiter must be one character [${ctx.options.separator}]`) + + ctx.options.separator = ctx.options.separator === undefined ? ',' : options.separator; + if(ctx.options.separator.length > 1 | ctx.options.separator.length === 0) + throw Error(`CSVError: separator must be one character [${ctx.options.separator}]`) const lexer = new RegExp(`${escapeRegExp(ctx.options.delimiter)}|${escapeRegExp(ctx.options.separator)}|\r\n|\n|\r|[^${escapeRegExp(ctx.options.delimiter)}${escapeRegExp(ctx.options.separator)}\r\n]+`, 'y') const isNewline = /^(\r\n|\n|\r)$/ diff --git a/test/options.spec.js b/test/options.spec.js index edb8ce5..f0a07b0 100644 --- a/test/options.spec.js +++ b/test/options.spec.js @@ -46,7 +46,7 @@ test('Typed #1 - When set to true the parser should infer the value types', (t) test('Typed #2- When set to false the parser should not infer the value types', (t) => { const expect = typed2.json const actual = CSV.parse(typed2.csv.join('\n'), { typed: false }) - + t.deepEqual(actual, expect) t.end() @@ -106,6 +106,19 @@ test('Separator #2 - The parser accepts regular expression meta characters as se t.end() }) +test('Separator #3 - The separator must be one character', (t) => { + t.throws( + () => CSV.parse(separator1.csv.join('\n'), { separator: '' }), + /separator must be one character/ + ) + t.throws( + () => CSV.parse(separator1.csv.join('\n'), { separator: '==' }), + /separator must be one character/ + ) + + t.end() +}) + test('Delimiter #1 - When set the parser should use this character as delimiter', (t) => { const expect = delimiter1.json const actual = CSV.parse(delimiter1.csv.join('\n'), { delimiter: '\'' }) @@ -121,5 +134,18 @@ test('Delimiter #2 - The parser accepts regular expression meta characters as de t.deepEqual(actual, expect) + t.end() +}) + +test('Delimiter #3 - The delimiter must be one character', (t) => { + t.throws( + () => CSV.parse(delimiter1.csv.join('\n'), { delimiter: '' }), + /delimiter must be one character/ + ) + t.throws( + () => CSV.parse(delimiter1.csv.join('\n'), { delimiter: '==' }), + /delimiter must be one character/ + ) + t.end() }) \ No newline at end of file From a1032fccbedcedd4eab9b8a13e2b94cdbda0a1f3 Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 14:29:19 +0200 Subject: [PATCH 3/8] added custom separator and delimiter support to stringify function --- index.js | 16 ++++++++++++---- test/__test__/delimiter3.json | 22 ++++++++++++++++++++++ test/__test__/separator1.json | 4 ++-- test/options.spec.js | 20 ++++++++++++++++++++ 4 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 test/__test__/delimiter3.json diff --git a/index.js b/index.js index 9882654..ee31675 100644 --- a/index.js +++ b/index.js @@ -139,19 +139,27 @@ export function stringify (array, options = {}, replacer = v => v) { ctx.col = 1 ctx.output = '' - const needsDelimiters = /"|,|\r\n|\n|\r/ + ctx.options.delimiter = ctx.options.delimiter === undefined ? '"' : options.delimiter; + if(ctx.options.delimiter.length > 1 | ctx.options.delimiter.length === 0) + throw Error(`CSVError: delimiter must be one character [${ctx.options.separator}]`) + + ctx.options.separator = ctx.options.separator === undefined ? ',' : options.separator; + if(ctx.options.separator.length > 1 | ctx.options.separator.length === 0) + throw Error(`CSVError: separator must be one character [${ctx.options.separator}]`) + + const needsDelimiters = new RegExp(`${escapeRegExp(ctx.options.delimiter)}|${escapeRegExp(ctx.options.separator)}|\r\n|\n|\r`) array.forEach((row, rIdx) => { let entry = '' ctx.col = 1 row.forEach((col, cIdx) => { if (typeof col === 'string') { - col = col.replace(/"/g, '""') - col = needsDelimiters.test(col) ? `"${col}"` : col + col = col.replace(ctx.options.delimiter, `${ctx.options.delimiter}${ctx.options.delimiter}`) + col = needsDelimiters.test(col) ? `${ctx.options.delimiter}${col}${ctx.options.delimiter}` : col } entry += replacer(col, ctx.row, ctx.col) if (cIdx !== row.length - 1) { - entry += ',' + entry += ctx.options.separator } ctx.col++ }) diff --git a/test/__test__/delimiter3.json b/test/__test__/delimiter3.json new file mode 100644 index 0000000..be8db51 --- /dev/null +++ b/test/__test__/delimiter3.json @@ -0,0 +1,22 @@ +{ + "description": [ + "When options.delimiter is set stringify uses this character as delimiter" + ], + "csv": [ + "123,456,789", + "false,true,'Test,with separator'", + "" + ], + "json": [ + [ + "123", + "456", + "789" + ], + [ + "false", + "true", + "Test,with separator" + ] + ] + } \ No newline at end of file diff --git a/test/__test__/separator1.json b/test/__test__/separator1.json index c688bfd..cac45d9 100644 --- a/test/__test__/separator1.json +++ b/test/__test__/separator1.json @@ -4,7 +4,7 @@ ], "csv": [ "123;456;789", - "false;true;Test", + "false;true;\"Test;with separator\"", "" ], "json": [ @@ -16,7 +16,7 @@ [ "false", "true", - "Test" + "Test;with separator" ] ] } \ No newline at end of file diff --git a/test/options.spec.js b/test/options.spec.js index f0a07b0..366baf2 100644 --- a/test/options.spec.js +++ b/test/options.spec.js @@ -15,6 +15,7 @@ const separator1 = require('./__test__/separator1.json') const separator2 = require('./__test__/separator2.json') const delimiter1 = require('./__test__/delimiter1.json') const delimiter2 = require('./__test__/delimiter2.json') +const delimiter3 = require('./__test__/delimiter3.json') test('Reviver #1 - The reviver should append 1 to each value', (t) => { const expect = reviver1.json @@ -119,6 +120,15 @@ test('Separator #3 - The separator must be one character', (t) => { t.end() }) +test('Separator #4 - When set stringify should use this character as separator', (t) => { + const expect = separator1.csv.join('\n') + const actual = CSV.stringify(separator1.json, { separator: ';' }) + + t.deepEqual(actual, expect) + + t.end() +}) + test('Delimiter #1 - When set the parser should use this character as delimiter', (t) => { const expect = delimiter1.json const actual = CSV.parse(delimiter1.csv.join('\n'), { delimiter: '\'' }) @@ -147,5 +157,15 @@ test('Delimiter #3 - The delimiter must be one character', (t) => { /delimiter must be one character/ ) + t.end() +}) + + +test('Delimiter #4 - When set stringify should use this character as delimiter', (t) => { + const expect = delimiter3.csv.join('\n') + const actual = CSV.stringify(delimiter3.json, { delimiter: '\'' }) + + t.deepEqual(actual, expect) + t.end() }) \ No newline at end of file From fd2f7985674c06b51b0329fcfa5ae4146dcefdb3 Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 14:32:51 +0200 Subject: [PATCH 4/8] updated README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 98acb8d..1863ec8 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,8 @@ Takes a string of CSV data and converts it to a 2 dimensional array of `[entries - csv - the CSV string to parse - options - typed - infer types (default `false`) + - separator - the used separator, one character (default `,`) + - delimiter - the used field delimiter, one character (default `"`) - reviver1 - a custom function to modify the output (default `(value) => value`) *1 Values for `row` and `col` are 1-based.* @@ -98,6 +100,8 @@ Takes a 2 dimensional array of `[entries][values]` and converts them to CSV - array - the input array to stringify - options - eof - add a trailing newline at the end of file (default `true`) + - separator - the used separator, one character (default `,`) + - delimiter - the used field delimiter, one character (default `"`) - replacer1 - a custom function to modify the values (default `(value) => value`) *1 Values for `row` and `col` are 1-based.* From 669a642947917302821793e3b9652e2471a7a89a Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 14:35:54 +0200 Subject: [PATCH 5/8] updated options in stringify comment --- index.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/index.js b/index.js index ee31675..c3d1370 100644 --- a/index.js +++ b/index.js @@ -124,6 +124,8 @@ export function parse (csv, options, reviver = v => v) { * * options * - eof - add a trailing newline at the end of file [true] + * - separator - use custom separator [,] + * - delimiter - use custom delimiter ["] * * @static * @param {Array} array the input array to stringify From 59bd72c32b2004aaa896891fa2443184f1da9c1f Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 14:39:41 +0200 Subject: [PATCH 6/8] fixed build errors --- index.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/index.js b/index.js index c3d1370..f8bfc0e 100644 --- a/index.js +++ b/index.js @@ -23,11 +23,11 @@ export function parse (csv, options, reviver = v => v) { ctx.row = 1 ctx.options.delimiter = ctx.options.delimiter === undefined ? '"' : options.delimiter; - if(ctx.options.delimiter.length > 1 | ctx.options.delimiter.length === 0) + if(ctx.options.delimiter.length > 1 || ctx.options.delimiter.length === 0) throw Error(`CSVError: delimiter must be one character [${ctx.options.separator}]`) ctx.options.separator = ctx.options.separator === undefined ? ',' : options.separator; - if(ctx.options.separator.length > 1 | ctx.options.separator.length === 0) + if(ctx.options.separator.length > 1 || ctx.options.separator.length === 0) throw Error(`CSVError: separator must be one character [${ctx.options.separator}]`) const lexer = new RegExp(`${escapeRegExp(ctx.options.delimiter)}|${escapeRegExp(ctx.options.separator)}|\r\n|\n|\r|[^${escapeRegExp(ctx.options.delimiter)}${escapeRegExp(ctx.options.separator)}\r\n]+`, 'y') @@ -142,11 +142,11 @@ export function stringify (array, options = {}, replacer = v => v) { ctx.output = '' ctx.options.delimiter = ctx.options.delimiter === undefined ? '"' : options.delimiter; - if(ctx.options.delimiter.length > 1 | ctx.options.delimiter.length === 0) + if(ctx.options.delimiter.length > 1 || ctx.options.delimiter.length === 0) throw Error(`CSVError: delimiter must be one character [${ctx.options.separator}]`) ctx.options.separator = ctx.options.separator === undefined ? ',' : options.separator; - if(ctx.options.separator.length > 1 | ctx.options.separator.length === 0) + if(ctx.options.separator.length > 1 || ctx.options.separator.length === 0) throw Error(`CSVError: separator must be one character [${ctx.options.separator}]`) const needsDelimiters = new RegExp(`${escapeRegExp(ctx.options.delimiter)}|${escapeRegExp(ctx.options.separator)}|\r\n|\n|\r`) From 5e3c31e0560975dba8e02117ae2b7249de928c12 Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 14:40:18 +0200 Subject: [PATCH 7/8] build .min.js and .d.ts --- index.d.ts | 4 ++++ index.min.js | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) delete mode 100644 index.min.js diff --git a/index.d.ts b/index.d.ts index 647e301..2e1dcb3 100644 --- a/index.d.ts +++ b/index.d.ts @@ -3,6 +3,8 @@ * * options * - typed - infer types [false] + * - separator - use custom separator [,] + * - delimiter - use custom delimiter ["] * * @static * @param {string} csv the CSV string to parse @@ -16,6 +18,8 @@ export function parse(csv: string, options?: any, reviver?: Function): any[]; * * options * - eof - add a trailing newline at the end of file [true] + * - separator - use custom separator [,] + * - delimiter - use custom delimiter ["] * * @static * @param {Array} array the input array to stringify diff --git a/index.min.js b/index.min.js deleted file mode 100644 index 5208156..0000000 --- a/index.min.js +++ /dev/null @@ -1,2 +0,0 @@ -function b(t,n,i=e=>e){let e=Object.create(null);e.options=n||{},e.reviver=i,e.value="",e.entry=[],e.output=[],e.col=1,e.row=1;let l=/"|,|\r\n|\n|\r|[^",\r\n]+/y,a=/^(\r\n|\n|\r)$/,u=[],o="",r=0;for(;(u=l.exec(t))!==null;)switch(o=u[0],r){case 0:switch(!0){case o==='"':r=3;break;case o===",":r=0,s(e);break;case a.test(o):r=0,s(e),c(e);break;default:e.value+=o,r=2;break}break;case 2:switch(!0){case o===",":r=0,s(e);break;case a.test(o):r=0,s(e),c(e);break;default:throw r=4,Error(`CSVError: Illegal state [row:${e.row}, col:${e.col}]`)}break;case 3:switch(!0){case o==='"':r=4;break;default:r=3,e.value+=o;break}break;case 4:switch(!0){case o==='"':r=3,e.value+=o;break;case o===",":r=0,s(e);break;case a.test(o):r=0,s(e),c(e);break;default:throw Error(`CSVError: Illegal state [row:${e.row}, col:${e.col}]`)}break}return e.entry.length!==0&&(s(e),c(e)),e.output}function w(t,n={},i=e=>e){let e=Object.create(null);e.options=n,e.options.eof=e.options.eof!==void 0?e.options.eof:!0,e.row=1,e.col=1,e.output="";let l=/"|,|\r\n|\n|\r/;return t.forEach((a,u)=>{let o="";switch(e.col=1,a.forEach((r,f)=>{typeof r=="string"&&(r=r.replace(/"/g,'""'),r=l.test(r)?`"${r}"`:r),o+=i(r,e.row,e.col),f!==a.length-1&&(o+=","),e.col++}),!0){case e.options.eof:case(!e.options.eof&&u!==t.length-1):e.output+=`${o} -`;break;default:e.output+=`${o}`;break}e.row++}),e.output}function s(t){let n=t.options.typed?p(t.value):t.value;t.entry.push(t.reviver(n,t.row,t.col)),t.value="",t.col++}function c(t){t.output.push(t.entry),t.entry=[],t.row++,t.col=1}function p(t){let n=/.\./;switch(!0){case t==="true":case t==="false":return t==="true";case n.test(t):return parseFloat(t);case isFinite(t):return parseInt(t);default:return t}}export{b as parse,w as stringify}; From 97d4c81be083f0fc93c460f1f331eeb83edc0e96 Mon Sep 17 00:00:00 2001 From: Marco Winter Date: Sun, 18 Jun 2023 14:43:04 +0200 Subject: [PATCH 8/8] removed unnecessary import --- test/options.spec.js | 1 - 1 file changed, 1 deletion(-) diff --git a/test/options.spec.js b/test/options.spec.js index 366baf2..4ce54ef 100644 --- a/test/options.spec.js +++ b/test/options.spec.js @@ -1,7 +1,6 @@ import test from 'tape' import * as CSV from '@vanillaes/csv' import { createRequire } from 'module' -import { sep } from 'path' const require = createRequire(import.meta.url) const reviver1 = require('./__test__/reviver1.json') const reviver2 = require('./__test__/reviver2.json')