From cb24c343d46ac83dc7022ccef9a5678d7679e870 Mon Sep 17 00:00:00 2001 From: Matthew Rayermann Date: Sun, 15 Feb 2026 15:50:28 -0800 Subject: [PATCH 1/2] Fix incomplete CSI final-byte and intermediate-byte matching --- index.js | 9 ++++--- test.js | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 2cc5ca2..2732a03 100644 --- a/index.js +++ b/index.js @@ -5,10 +5,13 @@ export default function ansiRegex({onlyFirst = false} = {}) { // OSC sequences only: ESC ] ... ST (non-greedy until the first ST) const osc = `(?:\\u001B\\][\\s\\S]*?${ST})`; - // CSI and related: ESC/C1, optional intermediates, optional params (supports ; and :) then final byte - const csi = '[\\u001B\\u009B][[\\]()#;?]*(?:\\d{1,4}(?:[;:]\\d{0,4})*)?[\\dA-PR-TZcf-nq-uy=><~]'; + // CSI and related: ESC/C1, optional params (supports ; and :), optional intermediate bytes (0x20-0x2F), then final byte (0x40-0x7E) + const csi = '[\\u001B\\u009B][[\\]()#;?]*(?:[\\d;:]*(?:[\\u0020-\\u002F]*[\\u0040-\\u007E]))'; - const pattern = `${osc}|${csi}`; + // ESC followed by a private-use final byte (0x3C-0x3E: < = >), e.g. DECKPAM, DECKPNM, DECANM + const escFp = '\\u001B[<=>]'; + + const pattern = `${osc}|${csi}|${escFp}`; return new RegExp(pattern, onlyFirst ? undefined : 'g'); } diff --git a/test.js b/test.js index c7240b2..a20086a 100644 --- a/test.js +++ b/test.js @@ -159,6 +159,81 @@ test('does not match ESC followed by unsupported final', t => { t.is(inputString.match(ansiRegex()), null); }); +test('match ECH (Erase Character) ESC[nX', t => { + t.is('hello\u001B[31Xworld'.replace(ansiRegex(), ''), 'helloworld'); + t.is('hello\u001B[5Xworld'.replace(ansiRegex(), ''), 'helloworld'); + t.is('hello\u001B[5Xworld'.match(ansiRegex())[0], '\u001B[5X'); +}); + +test('match ICH (Insert Character) ESC[n@', t => { + t.is('hello\u001B[3@world'.replace(ansiRegex(), ''), 'helloworld'); + t.is('hello\u001B[3@world'.match(ansiRegex())[0], '\u001B[3@'); +}); + +test('match REP (Repeat) ESC[nb', t => { + t.is('hello\u001B[2bworld'.replace(ansiRegex(), ''), 'helloworld'); + t.is('hello\u001B[2bworld'.match(ansiRegex())[0], '\u001B[2b'); +}); + +test('match VPA (Vertical Position Absolute) ESC[nd', t => { + t.is('hello\u001B[5dworld'.replace(ansiRegex(), ''), 'helloworld'); + t.is('hello\u001B[5dworld'.match(ansiRegex())[0], '\u001B[5d'); +}); + +test('match VPR (Vertical Position Relative) ESC[ne', t => { + t.is('hello\u001B[3eworld'.replace(ansiRegex(), ''), 'helloworld'); + t.is('hello\u001B[3eworld'.match(ansiRegex())[0], '\u001B[3e'); +}); + +test('match SU (Scroll Up) ESC[nS and SD (Scroll Down) ESC[nT', t => { + t.is('hello\u001B[3Sworld'.match(ansiRegex())[0], '\u001B[3S'); + t.is('hello\u001B[3Tworld'.match(ansiRegex())[0], '\u001B[3T'); + t.is('hello\u001B[3Sworld'.replace(ansiRegex(), ''), 'helloworld'); +}); + +test('match CBT (Cursor Backward Tabulation) ESC[nZ', t => { + t.is('hello\u001B[2Zworld'.match(ansiRegex())[0], '\u001B[2Z'); + t.is('hello\u001B[2Zworld'.replace(ansiRegex(), ''), 'helloworld'); +}); + +test('match CHT (Cursor Horizontal Tabulation) ESC[nI', t => { + t.is('hello\u001B[4Iworld'.match(ansiRegex())[0], '\u001B[4I'); + t.is('hello\u001B[4Iworld'.replace(ansiRegex(), ''), 'helloworld'); +}); + +test('match all ECMA-48 CSI final bytes (0x40-0x7E)', t => { + for (let code = 0x40; code <= 0x7E; code++) { + const finalByte = String.fromCharCode(code); + const seq = `\u001B[1${finalByte}`; + const input = `hello${seq}world`; + t.is(input.match(ansiRegex())[0], seq); + t.is(input.replace(ansiRegex(), ''), 'helloworld'); + } +}); + +test('match CSI with intermediate bytes', t => { + // DECSCUSR: ESC[ n SP q + const decscusr = '\u001B[2 q'; + t.is(`hello${decscusr}world`.match(ansiRegex())[0], decscusr); + t.is(`hello${decscusr}world`.replace(ansiRegex(), ''), 'helloworld'); +}); + +test('match CSI with no parameters', t => { + t.is('hello\u001B[Xworld'.match(ansiRegex())[0], '\u001B[X'); + t.is('hello\u001B[Xworld'.replace(ansiRegex(), ''), 'helloworld'); +}); + +test('match CSI with DEC private mode prefix', t => { + t.is('hello\u001B[?25hworld'.match(ansiRegex())[0], '\u001B[?25h'); + t.is('hello\u001B[?25hworld'.replace(ansiRegex(), ''), 'helloworld'); + t.is('hello\u001B[?25lworld'.match(ansiRegex())[0], '\u001B[?25l'); +}); + +test('match multi-param CSI with all final bytes', t => { + t.is('hello\u001B[1;2Xworld'.match(ansiRegex())[0], '\u001B[1;2X'); + t.is('hello\u001B[1;2Xworld'.replace(ansiRegex(), ''), 'helloworld'); +}); + // Testing against extended codes (excluding codes ending in 0-9) for (const [codeSetKey, codeSetValue] of Object.entries(ansiCodes)) { for (const [code, codeInfo] of codeSetValue) { From d52f33263777c1451dd5068c2053a5cf13b1b563 Mon Sep 17 00:00:00 2001 From: Matthew Rayermann Date: Sun, 15 Feb 2026 15:55:03 -0800 Subject: [PATCH 2/2] Fix CI checks --- test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.js b/test.js index a20086a..bfd23ec 100644 --- a/test.js +++ b/test.js @@ -203,7 +203,7 @@ test('match CHT (Cursor Horizontal Tabulation) ESC[nI', t => { test('match all ECMA-48 CSI final bytes (0x40-0x7E)', t => { for (let code = 0x40; code <= 0x7E; code++) { - const finalByte = String.fromCharCode(code); + const finalByte = String.fromCodePoint(code); const seq = `\u001B[1${finalByte}`; const input = `hello${seq}world`; t.is(input.match(ansiRegex())[0], seq);