diff --git a/index.js b/index.js index 367e6f0..12433ff 100644 --- a/index.js +++ b/index.js @@ -153,7 +153,7 @@ class CsvParser extends Transform { } for (let i = start; i < end; i++) { - const isStartingQuote = !isQuoted && buf[i] === this.quote + const isStartingQuote = !isQuoted && buf[i] === this.quote && (i === start || buf[i - 1] === comma) const isEndingQuote = isQuoted && buf[i] === this.quote && i + 1 <= end && buf[i + 1] === comma const isEscape = isQuoted && buf[i] === this.escape && i + 1 < end && buf[i + 1] === this.quote @@ -222,8 +222,10 @@ class CsvParser extends Transform { const bufLen = buf.length for (let i = start; i < bufLen; i++) { + const prevChr = i > 0 ? buf[i - 1] : null const chr = buf[i] const nextChr = i + 1 < bufLen ? buf[i + 1] : null + const nextNextChr = i + 2 < bufLen ? buf[i + 2] : null this._currentRowBytes++ if (this._currentRowBytes > this.maxRowBytes) { @@ -237,10 +239,19 @@ class CsvParser extends Transform { if (this._escaped) { this._escaped = false // non-escaped quote (quoting the cell) + continue } else { - this._quoted = !this._quoted + // not in escape- or quote-mode, currently at start or previous char was separator or linebreak -> enter quote mode + if (!this._quoted && (prevChr === null || prevChr === this.separator || prevChr === nl || prevChr === this.newline)) { + this._quoted = true + continue + } + // in quote-mode but not escape-mode, next char is separator or linebreak -> leave quote mode + if (this._quoted && (nextChr === this.separator || (this.customNewline ? nextChr === this.newline : nextChr === nl || (nextChr === cr && nextNextChr === nl)))) { + this._quoted = false + continue + } } - continue } if (!this._quoted) { diff --git a/test/data/unescaped_quotes.csv b/test/data/unescaped_quotes.csv new file mode 100644 index 0000000..5c29715 --- /dev/null +++ b/test/data/unescaped_quotes.csv @@ -0,0 +1,11 @@ +a,b,c +jo"e,sam,jan +"jo"e",sam,jan +joe,sa"m,jan +joe,"sa"m",jan +joe,sam,ja"n +joe,sam,"ja"n" +joe,"sa +"m",jan +joe,crlf,"jan" +joe,sam,"ja"n" diff --git a/test/maxRowBytes.test.js b/test/maxRowBytes.test.js index b5f3e72..b5a6435 100644 --- a/test/maxRowBytes.test.js +++ b/test/maxRowBytes.test.js @@ -5,9 +5,9 @@ const { collect } = require('./helpers/helper') test.cb('optional row size limit', (t) => { const verify = (err, lines) => { t.is(err.message, 'Row exceeds the maximum size', 'strict row size') - t.is(lines.length, 4576, '4576 rows before error') + t.is(lines.length, 13, '13 rows before error') t.end() } - collect('max_row_size.csv', { maxRowBytes: 200 }, verify) + collect('max_row_size.csv', { maxRowBytes: 170 }, verify) }) diff --git a/test/snapshots/test.js.md b/test/snapshots/test.js.md index 6494cf8..e029add 100644 --- a/test/snapshots/test.js.md +++ b/test/snapshots/test.js.md @@ -504,4 +504,79 @@ Generated by [AVA](https://ava.li). > Snapshot 1 - [] \ No newline at end of file + [ + +## cell with unescaped quotes + +> first row + + Row { + a: 'jo"e', + b: 'sam', + c: 'jan', + } + +> second row + + Row { + a: 'jo"e', + b: 'sam', + c: 'jan', + } + +> third row + + Row { + a: 'joe', + b: 'sa"m', + c: 'jan', + } + +> fourth row + + Row { + a: 'joe', + b: 'sa"m', + c: 'jan', + } + +> fifth row + + Row { + a: 'joe', + b: 'sam', + c: 'ja"n', + } + +> sixth row + + Row { + a: 'joe', + b: 'sam', + c: 'ja"n', + } + +> seventh row + + Row { + a: 'joe', + b: `sa␊ + "m`, + c: 'jan', + } + +> eighth row + + Row { + a: 'joe', + b: 'crlf', + c: 'jan', + } + +> ninth row + + Row { + a: 'joe', + b: 'sam', + c: 'ja"n', + } \ No newline at end of file diff --git a/test/snapshots/test.js.snap b/test/snapshots/test.js.snap index 82bfbf6..bd6addc 100644 Binary files a/test/snapshots/test.js.snap and b/test/snapshots/test.js.snap differ diff --git a/test/test.js b/test/test.js index 563ce0c..f66eba3 100644 --- a/test/test.js +++ b/test/test.js @@ -145,6 +145,26 @@ test.cb('cell with newline', (t) => { }) }) +test.cb('cell with unescaped quotes', (t) => { + const verify = (err, lines) => { + // console.log(lines); + t.false(err, 'no err') + t.snapshot(lines[0], 'first row') + t.snapshot(lines[1], 'second row') + t.snapshot(lines[2], 'third row') + t.snapshot(lines[3], 'fourth row') + t.snapshot(lines[4], 'fifth row') + t.snapshot(lines[5], 'sixth row') + t.snapshot(lines[6], 'seventh row') + t.snapshot(lines[7], 'eighth row') + t.snapshot(lines[8], 'ninth row') + t.is(lines.length, 9, '9 rows') + t.end() + } + + collect('unescaped_quotes.csv', verify) +}) + test.cb('cell with escaped quote in quotes', (t) => { const headers = bops.from('a\n') const cell = bops.from('"ha ""ha"" ha"\n')