Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/compose/resolve-block-scalar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ function parseBlockScalarHeader(

/** @returns Array of lines split up as `[indent, content]` */
function splitLines(source: string) {
const split = source.split(/\n( *)/)
const split = source.split(/(?:\r?\n|\r(?!\n))( *)/)
const first = split[0]
const m = first.match(/^( *)/)
const line0: [string, string] = m?.[1]
Expand Down
40 changes: 24 additions & 16 deletions src/compose/resolve-flow-scalar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,15 @@ function foldLines(source: string) {
*/
let first: RegExp, line: RegExp
try {
first = new RegExp('(.*?)(?<![ \t])[ \t]*\r?\n', 'sy')
line = new RegExp('[ \t]*(.*?)(?:(?<![ \t])[ \t]*)?\r?\n', 'sy')
// match all line breaks: \r\n, \n, or standalone \r
first = new RegExp('(.*?)(?<![ \t])[ \t]*(?:\r?\n|\r(?!\n))', 'sy')
line = new RegExp(
'[ \t]*(.*?)(?:(?<![ \t])[ \t]*)?(?:\r?\n|\r(?!\n))',
'sy'
)
} catch {
first = /(.*?)[ \t]*\r?\n/sy
line = /[ \t]*(.*?)[ \t]*\r?\n/sy
first = /(.*?)[ \t]*(?:\r?\n|\r(?!\n))/sy
line = /[ \t]*(.*?)[ \t]*(?:\r?\n|\r(?!\n))/sy
}
let match = first.exec(source)
if (!match) return source
Expand Down Expand Up @@ -147,22 +151,22 @@ function doubleQuotedValue(source: string, onError: FlowScalarErrorHandler) {
for (let i = 1; i < source.length - 1; ++i) {
const ch = source[i]
if (ch === '\r' && source[i + 1] === '\n') continue
if (ch === '\n') {
if (ch === '\n' || (ch === '\r' && source[i + 1] !== '\n')) {
const { fold, offset } = foldNewline(source, i)
res += fold
i = offset
} else if (ch === '\\') {
let next = source[++i]
const cc = escapeCodes[next]
if (cc) res += cc
else if (next === '\n') {
// skip escaped newlines, but still trim the following line
next = source[i + 1]
while (next === ' ' || next === '\t') next = source[++i + 1]
} else if (next === '\r' && source[i + 1] === '\n') {
// skip escaped CRLF newlines, but still trim the following line
else if (next === '\r' && source[i + 1] === '\n') {
// skip escaped CRLF, but still trim the following line
next = source[++i + 1]
while (next === ' ' || next === '\t') next = source[++i + 1]
} else if (next === '\n' || next === '\r') {
// skip escaped newline (LF or CR), but still trim the following line
next = source[i + 1]
while (next === ' ' || next === '\t') next = source[++i + 1]
} else if (next === 'x' || next === 'u' || next === 'U') {
const length = { x: 2, u: 4, U: 8 }[next]
res += parseCharCode(source, i + 1, length, onError)
Expand All @@ -177,7 +181,7 @@ function doubleQuotedValue(source: string, onError: FlowScalarErrorHandler) {
const wsStart = i
let next = source[i + 1]
while (next === ' ' || next === '\t') next = source[++i + 1]
if (next !== '\n' && !(next === '\r' && source[i + 2] === '\n'))
if (next !== '\n' && next !== '\r')
res += i > wsStart ? source.slice(wsStart, i + 1) : ch
} else {
res += ch
Expand All @@ -190,15 +194,19 @@ function doubleQuotedValue(source: string, onError: FlowScalarErrorHandler) {

/**
* Fold a single newline into a space, multiple newlines to N - 1 newlines.
* Presumes `source[offset] === '\n'`
* Presumes `source[offset] === '\n'` or `source[offset] === '\r'`
*/
function foldNewline(source: string, offset: number) {
let fold = ''
let ch = source[offset + 1]
while (ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r') {
if (ch === '\r' && source[offset + 2] !== '\n') break
if (ch === '\n') fold += '\n'
offset += 1
if (ch === '\n' || ch === '\r') {
fold += '\n'
if (ch === '\r' && source[offset + 2] === '\n') offset += 2
else offset += 1
} else {
offset += 1
}
ch = source[offset + 1]
}
if (!fold) fold = ' '
Expand Down
1 change: 1 addition & 0 deletions src/parse/cst.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ export function tokenType(source: string): TokenType | null {
case '':
case '\n':
case '\r\n':
case '\r':
return 'newline'
case '-':
return 'seq-item-ind'
Expand Down
50 changes: 34 additions & 16 deletions src/parse/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const flowIndicatorChars = new Set(',[]{}')
const invalidAnchorChars = new Set(' ,[]{}\n\r\t')
const isNotAnchorChar = (ch: string) => !ch || invalidAnchorChars.has(ch)

const blockScalarHeader = /([|>][^\s#]*)([ \t]*)((?:.|\r(?!\n))*)$/my
const blockScalarHeader = /([|>][^\s#]*)([ \t]*)([^\r\n]*)$/my
const blockStart = /([-?:])(?=[ \n\r\t]|$)([ \t]*)/y
const directiveLine = /(%.*?)(?:([ \t]+)(#.*)?)?$/my
const docMarker = /[-.]{3}(?=[ \n\r\t]|$)(?:([ \t]+)(#.*)?)?/y
Expand Down Expand Up @@ -91,7 +91,10 @@ class Lexer {
while (ch === ' ') ch = this.source[++indent + offset]
if (ch === '\r') {
const next = this.source[indent + offset + 1]
// \r\n is a single line break
if (next === '\n') return offset + indent + 1
// standalone \r is also a line break per YAML 1.2 spec
return offset + indent
}
return ch === '\n' || indent >= this.indentNext ? offset + indent : -1
}
Expand Down Expand Up @@ -204,11 +207,13 @@ class Lexer {
this.blockScalar()
return this.lineStart()
case '\r':
// \r\n and standalone \r are both line breaks
if (this.charAt(1) === '\n') {
this.count(2)
return this.lineStart()
} else {
this.count(1)
}
// fallthrough
return this.lineStart()
default:
this.plainScalar()
return 'document'
Expand Down Expand Up @@ -307,8 +312,8 @@ class Lexer {
break
}
case '\r':
if (this.charAt(1) === '\n') break
// fallthrough
// standalone \r is a line break, handled by newline() in loop
break
default:
this.flowKey = false
this.plainScalar()
Expand Down Expand Up @@ -355,12 +360,12 @@ class Lexer {
}
// Only looking for newlines within the quotes
const qb = this.source.substring(0, end)
let nl = qb.indexOf('\n', this.pos)
let nl = this.findLineBreak(qb, this.pos)
if (nl !== -1) {
while (nl !== -1) {
const cs = this.continueScalar(nl + 1)
if (cs === -1) break
nl = qb.indexOf('\n', cs)
nl = this.findLineBreak(qb, cs)
}
if (nl !== -1) {
// this is an error caused by an unexpected unindent
Expand Down Expand Up @@ -399,8 +404,10 @@ class Lexer {
indent = 0
break
case '\r':
if (this.source[i + 1] === '\n') break
// fallthrough
nl = i
indent = 0
if (this.source[i + 1] === '\n') i++ // skip \n in \r\n
break
default:
break loop
}
Expand All @@ -414,7 +421,7 @@ class Lexer {
do {
const cs = this.continueScalar(nl + 1)
if (cs === -1) break
nl = this.source.indexOf('\n', cs)
nl = this.findLineBreak(this.source, cs)
} while (nl !== -1)
if (nl === -1) nl = this.source.length
}
Expand Down Expand Up @@ -460,10 +467,11 @@ class Lexer {
i += 1
ch = '\n'
next = this.source[i + 1]
} else end = i
}
// standalone \r is also a line break
}
if (next === '#' || (inFlow && flowIndicatorChars.has(next))) break
if (ch === '\n') {
if (ch === '\n' || ch === '\r') {
const cs = this.continueScalar(i + 1)
if (cs === -1) break
i = Math.max(i, cs - 2) // to advance, but still account for ' #'
Expand Down Expand Up @@ -501,16 +509,26 @@ class Lexer {
private toLineEnd(): number {
let i = this.pos
let ch = this.source[i]
while (ch && ch !== '\n') ch = this.source[++i]
if (this.source[i - 1] === '\r') --i
// stop at \n or standalone \r
while (ch && ch !== '\n' && ch !== '\r') ch = this.source[++i]
return this.toIndex(i, false)
}

private findLineBreak(str: string, pos: number): number {
const nl = str.indexOf('\n', pos)
const cr = str.indexOf('\r', pos)
if (cr !== -1 && (nl === -1 || cr < nl - 1)) return cr
return nl
}

private newline(): number {
const ch = this.source[this.pos]
if (ch === '\n') return this.count(1)
else if (ch === '\r' && this.charAt(1) === '\n') return this.count(2)
else return 0
if (ch === '\r') {
if (this.charAt(1) === '\n') return this.count(2)
return this.count(1)
}
return 0
}

private spaces(allowTabs: boolean): number {
Expand Down
16 changes: 16 additions & 0 deletions tests/cst.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,19 @@ test('Line comment before unindented block-seq in block-map (#525)', () => {
const [doc] = Array.from(new Parser().parse(src))
expect(CST.stringify(doc)).toBe(src)
})

describe('standalone CR line break handling (#595)', () => {
test('tokenType recognizes standalone CR as newline', () => {
expect(CST.tokenType('\r')).toBe('newline')
expect(CST.tokenType('\n')).toBe('newline')
expect(CST.tokenType('\r\n')).toBe('newline')
})

test('Parser produces newline tokens for CR', () => {
const tokens = Array.from(new Parser().parse('a: 1\rb: 2'))
expect(tokens).toHaveLength(1)
const doc = tokens[0] as CST.Document
expect(doc.type).toBe('document')
expect(doc.value?.type).toBe('block-map')
})
})
113 changes: 112 additions & 1 deletion tests/doc/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ describe('scalars', () => {

test('carriage returns in double-quotes', () => {
const src = '"a\nb\n\rc\n\r\nd\n\r\n\re\n\r\n\r\nf"'
expect(YAML.parse(src)).toBe('a b \rc\nd\n\re\n\nf')
expect(YAML.parse(src)).toBe('a b\nc\nd\n\ne\n\nf')
})
})

Expand Down Expand Up @@ -944,3 +944,114 @@ describe('stringKeys', () => {
expect(doc.errors).toMatchObject([{ code: 'NON_STRING_KEY' }])
})
})

describe('standalone CR line break handling (#595)', () => {
describe('basic document parsing', () => {
test('CR-separated key-value pairs', () => {
expect(YAML.parse('a: 1\rb: 2\rc: 3')).toEqual({ a: 1, b: 2, c: 3 })
})

test('CR produces same result as LF', () => {
const crDoc = 'a: 1\rb: 2'
const lfDoc = 'a: 1\nb: 2'
expect(YAML.parse(crDoc)).toEqual(YAML.parse(lfDoc))
})

test('mixed CR, LF, and CRLF line breaks', () => {
expect(YAML.parse('a: 1\rb: 2\nc: 3\r\nd: 4')).toEqual({
a: 1,
b: 2,
c: 3,
d: 4
})
})

test('CR in block sequence', () => {
expect(YAML.parse('- a\r- b\r- c')).toEqual(['a', 'b', 'c'])
})

test('CR at end of document', () => {
expect(YAML.parse('foo: bar\r')).toEqual({ foo: 'bar' })
})
})

describe('double-quoted strings', () => {
test('unescaped CR folds to space', () => {
expect(YAML.parse('"a\rb"')).toBe('a b')
})

test('multiple unescaped CRs fold to newlines', () => {
expect(YAML.parse('"a\r\rb"')).toBe('a\nb')
expect(YAML.parse('"a\r\r\rb"')).toBe('a\n\nb')
})

test('CR matches LF folding behavior', () => {
expect(YAML.parse('"a\rb"')).toBe(YAML.parse('"a\nb"'))
expect(YAML.parse('"a\r\rb"')).toBe(YAML.parse('"a\n\nb"'))
})

test('escaped CR is line continuation', () => {
expect(YAML.parse('"a\\\rb"')).toBe('ab')
expect(YAML.parse('"a\\\r b"')).toBe('ab') // trims following whitespace
})

test('escaped CR matches escaped LF behavior', () => {
expect(YAML.parse('"a\\\rb"')).toBe(YAML.parse('"a\\\nb"'))
})
})

describe('single-quoted strings', () => {
test('CR folds to space in single-quoted string', () => {
expect(YAML.parse("'a\rb'")).toBe('a b')
})

test('multiple CRs fold correctly', () => {
expect(YAML.parse("'a\r\rb'")).toBe('a\nb')
})
})

describe('block scalars', () => {
test('literal block scalar with CR', () => {
expect(YAML.parse('|\ra\rb')).toBe('a\nb\n')
})

test('folded block scalar with CR', () => {
expect(YAML.parse('>\ra\rb')).toBe('a b\n')
})

test('block scalar content with CR line breaks', () => {
expect(YAML.parse('|\r line1\r line2')).toBe('line1\nline2\n')
})
})

describe('flow collections', () => {
test('CR in flow sequence', () => {
expect(YAML.parse('[\r1\r,\r2\r]')).toEqual([1, 2])
})

test('CR in flow mapping', () => {
expect(YAML.parse('{\ra: 1\r,\rb: 2\r}')).toEqual({ a: 1, b: 2 })
})
})

describe('comments', () => {
test('CR before comment', () => {
expect(YAML.parse('foo\r# comment')).toBe('foo')
})

test('CR after comment', () => {
expect(YAML.parse('a: 1 # comment\rb: 2')).toEqual({ a: 1, b: 2 })
})
})

describe('LF followed by CR (\\n\\r)', () => {
test('\\n\\r is two separate line breaks', () => {
// \n\r = LF + CR = two line breaks, folds to one newline
expect(YAML.parse('"a\n\rb"')).toBe('a\nb')
})

test('\\n\\r in document structure', () => {
expect(YAML.parse('a: 1\n\rb: 2')).toEqual({ a: 1, b: 2 })
})
})
})
5 changes: 3 additions & 2 deletions tests/doc/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,10 @@ describe('tags', () => {
})

test('CR in tag shorthand (#501)', () => {
// \r is now a line break, so !\r! is parsed as ! + newline + !
const doc = parseDocument(': | !\r!')
const err = doc.errors.find(err => err.code === 'TAG_RESOLVE_FAILED')
expect(err).not.toBeFalsy()
const err = doc.errors.find(err => err.code === 'MULTILINE_IMPLICIT_KEY')
expect(err).toBeDefined()
})
})

Expand Down
Loading