From fd88b6a2a32267858762574950f2959258361c69 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 16:25:38 -0600 Subject: [PATCH 01/16] better workflow naming --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4b4d59e..1fc760e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,4 +1,4 @@ -name: CI Push +name: Push to main on: push: From 67303dde596338ef26e739fbaf31c2a5e04f5036 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:00:05 -0600 Subject: [PATCH 02/16] add prettier --- package.json | 3 +++ pnpm-lock.yaml | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/package.json b/package.json index 178ce25..5b2475c 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,10 @@ "@types/jest": "^30.0.0", "@types/node": "^24.3.0", "eslint": "^9.34.0", + "eslint-config-prettier": "^10.1.8", + "eslint-plugin-prettier": "^5.5.4", "jest": "^30.1.1", + "prettier": "^3.6.2", "ts-jest": "^29.4.1", "ts-node": "^10.9.2", "tsup": "^8.5.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c6cb6fb..647a0da 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -20,9 +20,18 @@ importers: eslint: specifier: ^9.34.0 version: 9.34.0 + eslint-config-prettier: + specifier: ^10.1.8 + version: 10.1.8(eslint@9.34.0) + eslint-plugin-prettier: + specifier: ^5.5.4 + version: 5.5.4(eslint-config-prettier@10.1.8(eslint@9.34.0))(eslint@9.34.0)(prettier@3.6.2) jest: specifier: ^30.1.1 version: 30.1.1(@types/node@24.3.0)(ts-node@10.9.2(@types/node@24.3.0)(typescript@5.9.2)) + prettier: + specifier: ^3.6.2 + version: 3.6.2 ts-jest: specifier: ^29.4.1 version: 29.4.1(@babel/core@7.28.3)(@jest/transform@30.1.1)(@jest/types@30.0.5)(babel-jest@30.1.1(@babel/core@7.28.3))(esbuild@0.25.9)(jest-util@30.0.5)(jest@30.1.1(@types/node@24.3.0)(ts-node@10.9.2(@types/node@24.3.0)(typescript@5.9.2)))(typescript@5.9.2) @@ -1157,6 +1166,26 @@ packages: resolution: {integrity: sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==} engines: {node: '>=10'} + eslint-config-prettier@10.1.8: + resolution: {integrity: sha512-82GZUjRS0p/jganf6q1rEO25VSoHH0hKPCTrgillPjdI/3bgBhAE1QzHrHTizjpRvy6pGAvKjDJtk2pF9NDq8w==} + hasBin: true + peerDependencies: + eslint: '>=7.0.0' + + eslint-plugin-prettier@5.5.4: + resolution: {integrity: sha512-swNtI95SToIz05YINMA6Ox5R057IMAmWZ26GqPxusAp1TZzj+IdY9tXNWWD3vkF/wEqydCONcwjTFpxybBqZsg==} + engines: {node: ^14.18.0 || >=16.0.0} + peerDependencies: + '@types/eslint': '>=8.0.0' + eslint: '>=8.0.0' + eslint-config-prettier: '>= 7.0.0 <10.0.0 || >=10.1.0' + prettier: '>=3.0.0' + peerDependenciesMeta: + '@types/eslint': + optional: true + eslint-config-prettier: + optional: true + eslint-scope@8.4.0: resolution: {integrity: sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} @@ -1219,6 +1248,9 @@ packages: fast-deep-equal@3.1.3: resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} + fast-diff@1.3.0: + resolution: {integrity: sha512-VxPP4NqbUjj6MaAOafWeUn2cXWLcCtljklUtZf0Ind4XQ+QPtmA0b18zZy0jIQx+ExRVCR/ZQpBmik5lXshNsw==} + fast-glob@3.3.3: resolution: {integrity: sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==} engines: {node: '>=8.6.0'} @@ -1816,6 +1848,15 @@ packages: resolution: {integrity: sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==} engines: {node: '>= 0.8.0'} + prettier-linter-helpers@1.0.0: + resolution: {integrity: sha512-GbK2cP9nraSSUF9N2XwUwqfzlAFlMNYYl+ShE/V+H8a9uNl/oUqB1w2EL54Jh0OlyRSd8RfWYJ3coVS4TROP2w==} + engines: {node: '>=6.0.0'} + + prettier@3.6.2: + resolution: {integrity: sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==} + engines: {node: '>=14'} + hasBin: true + pretty-format@30.0.5: resolution: {integrity: sha512-D1tKtYvByrBkFLe2wHJl2bwMJIiT8rW+XA+TiataH79/FszLQMrpGEvzUVkzPau7OCO0Qnrhpe87PqtOAIB8Yw==} engines: {node: ^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0} @@ -3324,6 +3365,19 @@ snapshots: escape-string-regexp@4.0.0: {} + eslint-config-prettier@10.1.8(eslint@9.34.0): + dependencies: + eslint: 9.34.0 + + eslint-plugin-prettier@5.5.4(eslint-config-prettier@10.1.8(eslint@9.34.0))(eslint@9.34.0)(prettier@3.6.2): + dependencies: + eslint: 9.34.0 + prettier: 3.6.2 + prettier-linter-helpers: 1.0.0 + synckit: 0.11.11 + optionalDependencies: + eslint-config-prettier: 10.1.8(eslint@9.34.0) + eslint-scope@8.4.0: dependencies: esrecurse: 4.3.0 @@ -3418,6 +3472,8 @@ snapshots: fast-deep-equal@3.1.3: {} + fast-diff@1.3.0: {} + fast-glob@3.3.3: dependencies: '@nodelib/fs.stat': 2.0.5 @@ -4145,6 +4201,12 @@ snapshots: prelude-ls@1.2.1: {} + prettier-linter-helpers@1.0.0: + dependencies: + fast-diff: 1.3.0 + + prettier@3.6.2: {} + pretty-format@30.0.5: dependencies: '@jest/schemas': 30.0.5 From 466238aebeeaf37bd909eb5b30cf7e37c33874ea Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:00:42 -0600 Subject: [PATCH 03/16] add prettier ignore file --- .prettierignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .prettierignore diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..f06235c --- /dev/null +++ b/.prettierignore @@ -0,0 +1,2 @@ +node_modules +dist From da260eca66287ab9503fcd1b75b332577e1de30e Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:11:02 -0600 Subject: [PATCH 04/16] add prettier config --- .prettierrc.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .prettierrc.json diff --git a/.prettierrc.json b/.prettierrc.json new file mode 100644 index 0000000..eba5a68 --- /dev/null +++ b/.prettierrc.json @@ -0,0 +1,7 @@ +{ + "semi": true, + "singleQuote": true, + "trailingComma": "es5", + "printWidth": 80, + "tabWidth": 2 +} From 51dcb8c09149edc24b5a0c351111c8da091c902b Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:11:22 -0600 Subject: [PATCH 05/16] update eslint config --- eslint.config.mjs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eslint.config.mjs b/eslint.config.mjs index 6a5b906..f309854 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -1,4 +1,6 @@ import tseslint from "typescript-eslint"; +import prettier from "eslint-plugin-prettier"; +import prettierConfig from "eslint-config-prettier"; export default tseslint.config( { @@ -14,10 +16,12 @@ export default tseslint.config( }, plugins: { "@typescript-eslint": tseslint.plugin, + prettier, }, rules: { ...tseslint.configs.recommended.rules, - + ...prettierConfig.rules, // disables conflicting ESLint rules + "prettier/prettier": "error", // enforce Prettier formatting "@typescript-eslint/no-explicit-any": "warn", "@typescript-eslint/explicit-function-return-type": "off", }, From fffec7cf74c441a8007601debaeb9b55279998a7 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:11:52 -0600 Subject: [PATCH 06/16] add format scripts to package.json --- package.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package.json b/package.json index 5b2475c..f29bbd1 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,8 @@ "prepublishOnly": "pnpm build", "lint": "eslint src --ext .ts", "lint:fix": "eslint src --ext .ts --fix", + "format": "prettier --check \"src/**/*.{ts,js,json,md}\"", + "format:fix": "prettier --write \"src/**/*.{ts,js,json,md}\"", "test": "jest", "test:watch": "jest --watchAll" }, From c0233d8e1309627f4031def5dd82c8dbaecd10d1 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:12:11 -0600 Subject: [PATCH 07/16] fix formatting --- src/index.ts | 8 +-- src/textify.test.ts | 80 +++++++++++++------------- src/utils/preserveFormat.test.ts | 98 ++++++++++++++++---------------- src/utils/preserveFormat.ts | 48 ++++++++-------- 4 files changed, 117 insertions(+), 117 deletions(-) diff --git a/src/index.ts b/src/index.ts index 497a6da..a6360d5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ -import preserveFormat from "./utils/preserveFormat"; +import preserveFormat from './utils/preserveFormat'; export interface TextifyOptions { html: string; @@ -12,7 +12,7 @@ export function textify({ ignoreTags = [], }: TextifyOptions): string { // Ignore rest of the function if it's already empty - if (!html) return ""; + if (!html) return ''; if (preserveFormatting) { // Keep readable formatting @@ -20,13 +20,13 @@ export function textify({ } else { if (ignoreTags.length === 0) { // Strip all tags - html = html.replace(/<[^>]+>/g, "").trim(); + html = html.replace(/<[^>]+>/g, '').trim(); } else { // Regex to match all tags except the ignored ones const IG = new Set(ignoreTags.map((t) => t.toLowerCase())); html = html .replace(/<\/?([a-z][a-z0-9-]*)\b[^>]*>/gi, (match, tag) => - IG.has(tag.toLowerCase()) ? match : "" + IG.has(tag.toLowerCase()) ? match : '' ) .trim(); } diff --git a/src/textify.test.ts b/src/textify.test.ts index 1e678cc..67f3eeb 100644 --- a/src/textify.test.ts +++ b/src/textify.test.ts @@ -1,98 +1,98 @@ -import { textify } from "./index"; +import { textify } from './index'; -describe("textify", () => { - test("returns empty string if html is empty", () => { - expect(textify({ html: "" })).toBe(""); - expect(textify({ html: null as unknown as string })).toBe(""); +describe('textify', () => { + test('returns empty string if html is empty', () => { + expect(textify({ html: '' })).toBe(''); + expect(textify({ html: null as unknown as string })).toBe(''); }); - test("strips all tags except ignored ones", () => { + test('strips all tags except ignored ones', () => { const html = - "

Paragraph bold italic

"; + '

Paragraph bold italic

'; const result = textify({ html, preserveFormatting: true, - ignoreTags: ["mark", "foo"], + ignoreTags: ['mark', 'foo'], }); - expect(result).toBe("Paragraph **bold***italic*"); + expect(result).toBe('Paragraph **bold***italic*'); }); - test("handles multiple ignored tags", () => { - const html = "

Paragraph bold italic underlined

"; + test('handles multiple ignored tags', () => { + const html = '

Paragraph bold italic underlined

'; const result = textify({ html, preserveFormatting: false, - ignoreTags: ["b", "u"], + ignoreTags: ['b', 'u'], }); - expect(result).toBe("Paragraph bold italic underlined"); + expect(result).toBe('Paragraph bold italic underlined'); }); - test("trims whitespace after stripping tags", () => { - const html = "

Test

"; + test('trims whitespace after stripping tags', () => { + const html = '

Test

'; const result = textify({ html, preserveFormatting: false }); - expect(result).toBe("Test"); + expect(result).toBe('Test'); }); - test("preserveFormat has no effect when they are in ignoreTags", () => { - const html = "

Paragraph bold italic

"; + test('preserveFormat has no effect when they are in ignoreTags', () => { + const html = '

Paragraph bold italic

'; const result = textify({ html, preserveFormatting: true, - ignoreTags: ["b", "i"], + ignoreTags: ['b', 'i'], }); - expect(result).toBe("Paragraph bolditalic"); + expect(result).toBe('Paragraph bolditalic'); }); - test("removes all tags when ignoreTags is empty", () => { - const html = "
Hello World
"; + test('removes all tags when ignoreTags is empty', () => { + const html = '
Hello World
'; const result = textify({ html, preserveFormatting: false, ignoreTags: [] }); - expect(result).toBe("Hello World"); + expect(result).toBe('Hello World'); }); - test("case-insensitive matching for ignoreTags", () => { - const html = "

Text with bold tag

"; + test('case-insensitive matching for ignoreTags', () => { + const html = '

Text with bold tag

'; const result = textify({ html, preserveFormatting: false, - ignoreTags: ["b"], + ignoreTags: ['b'], }); - expect(result).toBe("Text with bold tag"); + expect(result).toBe('Text with bold tag'); }); - test("self-closing ignored tags are preserved", () => { - const html = "Line break
Next line"; + test('self-closing ignored tags are preserved', () => { + const html = 'Line break
Next line'; const result = textify({ html, preserveFormatting: false, - ignoreTags: ["br"], + ignoreTags: ['br'], }); - expect(result).toBe("Line break
Next line"); + expect(result).toBe('Line break
Next line'); }); - test("self-closing non-ignored tags are stripped", () => { - const html = "Line break
Next line"; + test('self-closing non-ignored tags are stripped', () => { + const html = 'Line break
Next line'; const result = textify({ html, preserveFormatting: false, ignoreTags: [], }); - expect(result).toBe("Line breakNext line"); + expect(result).toBe('Line breakNext line'); }); - test("ignores invalid or unknown tags if not in ignoreTags", () => { - const html = "Hello ??? World"; + test('ignores invalid or unknown tags if not in ignoreTags', () => { + const html = 'Hello ??? World'; const result = textify({ html, preserveFormatting: false, ignoreTags: [], }); - expect(result).toBe("Hello ??? World"); + expect(result).toBe('Hello ??? World'); }); - test("preserveFormatting=true delegates to preserveFormat", () => { - const html = "

Hello world

"; + test('preserveFormatting=true delegates to preserveFormat', () => { + const html = '

Hello world

'; const result = textify({ html, preserveFormatting: true }); // since preserveFormat handles it, just check it returns something non-empty - expect(result).not.toBe(""); + expect(result).not.toBe(''); }); }); diff --git a/src/utils/preserveFormat.test.ts b/src/utils/preserveFormat.test.ts index b878fd5..551efcd 100644 --- a/src/utils/preserveFormat.test.ts +++ b/src/utils/preserveFormat.test.ts @@ -1,111 +1,111 @@ -import preserveFormat from "./preserveFormat"; +import preserveFormat from './preserveFormat'; -describe("preserveFormat", () => { - it("should return empty string for empty input", () => { - expect(preserveFormat({ html: "" })).toBe(""); +describe('preserveFormat', () => { + it('should return empty string for empty input', () => { + expect(preserveFormat({ html: '' })).toBe(''); }); - it("should handle line breaks and paragraphs", () => { - const html = "

First paragraph


Second line"; - const expected = "First paragraph\n\nSecond line"; + it('should handle line breaks and paragraphs', () => { + const html = '

First paragraph


Second line'; + const expected = 'First paragraph\n\nSecond line'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle headings", () => { - const html = "

Heading 1

Heading 2

padding
"; - const expected = "Heading 1\n\nHeading 2\n\npadding"; + it('should handle headings', () => { + const html = '

Heading 1

Heading 2

padding
'; + const expected = 'Heading 1\n\nHeading 2\n\npadding'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle unordered lists", () => { - const html = "
  • Item 1
  • Item 2
padding
"; - const expected = "- Item 1\n- Item 2\npadding"; + it('should handle unordered lists', () => { + const html = '
  • Item 1
  • Item 2
padding
'; + const expected = '- Item 1\n- Item 2\npadding'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle ordered lists", () => { - const html = "
  1. First
  2. Second
padding
"; - const expected = "1. First\n2. Second\npadding"; + it('should handle ordered lists', () => { + const html = '
  1. First
  2. Second
padding
'; + const expected = '1. First\n2. Second\npadding'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle links", () => { + it('should handle links', () => { const html = 'Click here'; - const expected = "Click here (https://example.com)"; + const expected = 'Click here (https://example.com)'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle bold and italic", () => { - const html = "Bold and Italic"; - const expected = "**Bold** and *Italic*"; + it('should handle bold and italic', () => { + const html = 'Bold and Italic'; + const expected = '**Bold** and *Italic*'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle blockquotes", () => { - const html = "
Quote line 1
Quote line 2
"; - const expected = "> Quote line 1\n> Quote line 2"; + it('should handle blockquotes', () => { + const html = '
Quote line 1
Quote line 2
'; + const expected = '> Quote line 1\n> Quote line 2'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle tables", () => { + it('should handle tables', () => { const html = `
A1B1
A2B2
`; - const expected = "A1\tB1\nA2\tB2"; + const expected = 'A1\tB1\nA2\tB2'; expect(preserveFormat({ html })).toBe(expected); }); - it("should handle multiple newlines and HTML entities", () => { - const html = "

Line 1 & test

Line 2

"; - const expected = "Line 1 & test\n\nLine 2"; + it('should handle multiple newlines and HTML entities', () => { + const html = '

Line 1 & test

Line 2

'; + const expected = 'Line 1 & test\n\nLine 2'; expect(preserveFormat({ html })).toBe(expected); }); - it("should strip unknown tags", () => { - const html = "
Content inside
"; - const expected = "Content inside"; + it('should strip unknown tags', () => { + const html = '
Content inside
'; + const expected = 'Content inside'; expect(preserveFormat({ html })).toBe(expected); }); - it("should ignore headings and paragraphs when listed in ignoreTags", () => { - const html = "

Heading

Paragraph

"; - const expected = "

Heading

Paragraph

"; - expect(preserveFormat({ html, ignoreTags: ["h1", "p"] })).toBe(expected); + it('should ignore headings and paragraphs when listed in ignoreTags', () => { + const html = '

Heading

Paragraph

'; + const expected = '

Heading

Paragraph

'; + expect(preserveFormat({ html, ignoreTags: ['h1', 'p'] })).toBe(expected); }); - it("should ignore links when listed in ignoreTags", () => { + it('should ignore links when listed in ignoreTags', () => { const html = 'Click here'; const expected = 'Click here'; - expect(preserveFormat({ html, ignoreTags: ["a"] })).toBe(expected); + expect(preserveFormat({ html, ignoreTags: ['a'] })).toBe(expected); }); - it("should ignore blockquotes when listed in ignoreTags", () => { - const html = "
Quote line 1
Quote line 2
"; - const expected = "
Quote line 1\nQuote line 2
"; - expect(preserveFormat({ html, ignoreTags: ["blockquote"] })).toBe(expected); + it('should ignore blockquotes when listed in ignoreTags', () => { + const html = '
Quote line 1
Quote line 2
'; + const expected = '
Quote line 1\nQuote line 2
'; + expect(preserveFormat({ html, ignoreTags: ['blockquote'] })).toBe(expected); }); - it("should ignore bold and italic when listed in ignoreTags", () => { - const html = "Bold and Italic"; - const expected = "Bold and Italic"; - expect(preserveFormat({ html, ignoreTags: ["b", "i"] })).toBe(expected); + it('should ignore bold and italic when listed in ignoreTags', () => { + const html = 'Bold and Italic'; + const expected = 'Bold and Italic'; + expect(preserveFormat({ html, ignoreTags: ['b', 'i'] })).toBe(expected); }); - it("should ignore table rows and cells when listed in ignoreTags", () => { + it('should ignore table rows and cells when listed in ignoreTags', () => { const html = `
A1B1
A2B2
`; const expected = `
A1B1
A2B2
`; - expect(preserveFormat({ html, ignoreTags: ["table", "tr", "td"] })).toBe( + expect(preserveFormat({ html, ignoreTags: ['table', 'tr', 'td'] })).toBe( expected ); }); - it("should handle complex nested content", () => { + it('should handle complex nested content', () => { const html = `

Main Heading

Paragraph with bold and italic text

  • Item 1
  • Item 2
diff --git a/src/utils/preserveFormat.ts b/src/utils/preserveFormat.ts index 3770b12..5dd629b 100644 --- a/src/utils/preserveFormat.ts +++ b/src/utils/preserveFormat.ts @@ -7,17 +7,17 @@ export default function preserveFormat({ html, ignoreTags = [], }: PreserveFormatOptions): string { - if (!html) return ""; + if (!html) return ''; // Normalize spaces between tags - html = html.replace(/>\s+<"); + html = html.replace(/>\s+<'); // Convert
to newline - html = !ignoreTags.includes("br") ? html.replace(//gi, "\n") : html; + html = !ignoreTags.includes('br') ? html.replace(//gi, '\n') : html; // Headings and paragraphs -> double newline html = html.replace(/<\/(h[1-6]|p)>/gi, (match, tag) => - ignoreTags.includes(tag.toLowerCase()) ? match : "\n\n" + ignoreTags.includes(tag.toLowerCase()) ? match : '\n\n' ); // Bold @@ -33,7 +33,7 @@ export default function preserveFormat({ ); // Links - html = !ignoreTags.includes("a") + html = !ignoreTags.includes('a') ? html.replace( /(.*?)<\/a>/gi, (_m, href: string, text: string) => `${text} (${href})` @@ -42,32 +42,32 @@ export default function preserveFormat({ // Ordered lists html = html.replace(/
    (.*?)<\/ol>/gis, (match, content: string) => { - if (ignoreTags.includes("ol")) return match; // leave
      as-is + if (ignoreTags.includes('ol')) return match; // leave
        as-is let counter = 0; return content.replace(/
      1. (.*?)<\/li>/gi, (liMatch, liContent: string) => - ignoreTags.includes("li") ? liMatch : `${++counter}. ${liContent}\n` + ignoreTags.includes('li') ? liMatch : `${++counter}. ${liContent}\n` ); }); // Unordered lists html = html.replace(/
          (.*?)<\/ul>/gis, (match, content: string) => { - if (ignoreTags.includes("ul")) return match; // keep whole
            block + if (ignoreTags.includes('ul')) return match; // keep whole
              block return content.replace(/
            • (.*?)<\/li>/gi, (liMatch, liContent: string) => - ignoreTags.includes("li") ? liMatch : `- ${liContent}\n` + ignoreTags.includes('li') ? liMatch : `- ${liContent}\n` ); }); // Blockquotes - html = !ignoreTags.includes("blockquote") + html = !ignoreTags.includes('blockquote') ? html.replace( /
              (.*?)<\/blockquote>/gis, (_m, content: string) => content - .replace(//gi, "\n") + .replace(//gi, '\n') .trim() - .split("\n") + .split('\n') .map((line) => `> ${line.trim()}`) - .join("\n") + .join('\n') ) : html; @@ -75,21 +75,21 @@ export default function preserveFormat({ html = html.replace( /(.*?)<\/table>/gis, (match, tableContent: string) => { - if (ignoreTags.includes("table")) return match; // keep whole table + if (ignoreTags.includes('table')) return match; // keep whole table return tableContent .replace(/(.*?)<\/tr>/gi, (trMatch, rowContent: string) => { - if (ignoreTags.includes("tr")) return trMatch; + if (ignoreTags.includes('tr')) return trMatch; return ( rowContent .replace( /(.*?)<\/t[dh]>/gi, (cellMatch, cellContent: string) => - ignoreTags.includes("td") || ignoreTags.includes("th") + ignoreTags.includes('td') || ignoreTags.includes('th') ? cellMatch : `${cellContent}\t` ) .trim() - .replace(/\t$/, "") + "\n" + .replace(/\t$/, '') + '\n' ); }) .trim(); @@ -98,22 +98,22 @@ export default function preserveFormat({ // Remove all remaining tags if (ignoreTags.length === 0) { - html = html.replace(/<[^>]+>/g, ""); + html = html.replace(/<[^>]+>/g, ''); } else { html = html.replace(/<\/?([a-z0-9]+)[^>]*>/gi, (match, tag: string) => - ignoreTags.includes(tag.toLowerCase()) ? match : "" + ignoreTags.includes(tag.toLowerCase()) ? match : '' ); } // Decode common HTML entities html = html - .replace(/ /gi, " ") - .replace(/&/gi, "&") - .replace(/</gi, "<") - .replace(/>/gi, ">"); + .replace(/ /gi, ' ') + .replace(/&/gi, '&') + .replace(/</gi, '<') + .replace(/>/gi, '>'); // Collapse multiple newlines to max two - html = html.replace(/\n{3,}/g, "\n\n").trim(); + html = html.replace(/\n{3,}/g, '\n\n').trim(); return html; } From dfb594ce3b9133daab13e3f4231b20e0551b6ee9 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:14:09 -0600 Subject: [PATCH 08/16] add format to ci workflows --- .github/workflows/main.yml | 1 + .github/workflows/pull-request.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1fc760e..9659964 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,5 +19,6 @@ jobs: version: 8 - run: pnpm install - run: pnpm lint + - run: pnpm format - run: pnpm test - run: pnpm build diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 167a155..94740e1 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -30,6 +30,9 @@ jobs: - name: Run lint run: pnpm lint + - name: Run format + run: pnpm format + - name: Run tests run: pnpm test From e9a4a39e2cd6c772013e16b3b9c4e9d259dd29b3 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Mon, 1 Sep 2025 18:20:55 -0600 Subject: [PATCH 09/16] add editorconfig --- .editorconfig | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..7b3cfad --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +root = true + +[*] +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true +indent_style = space +indent_size = 2 + +[*.{diff,md}] +trim_trailing_whitespace = false From 638972c87696f55882dd30af7fecd89719a52404 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Tue, 2 Sep 2025 20:31:44 -0600 Subject: [PATCH 10/16] add util for text wrap by words count --- src/utils/wrapByWords.test.ts | 31 +++++++++++++++++++++++++++++++ src/utils/wrapByWords.ts | 21 +++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 src/utils/wrapByWords.test.ts create mode 100644 src/utils/wrapByWords.ts diff --git a/src/utils/wrapByWords.test.ts b/src/utils/wrapByWords.test.ts new file mode 100644 index 0000000..7ad9e19 --- /dev/null +++ b/src/utils/wrapByWords.test.ts @@ -0,0 +1,31 @@ +import { wrapByWords } from './wrapByWords'; + +describe('wrapByWords', () => { + test('splits text into lines of given word count', () => { + const text = 'one two three four five six seven'; + expect(wrapByWords(text, 3)).toBe('one two three\nfour five six\nseven'); + }); + + test('handles text shorter than word count', () => { + const text = 'hello world'; + expect(wrapByWords(text, 5)).toBe('hello world'); + }); + + test('handles exact multiples', () => { + const text = 'a b c d'; + expect(wrapByWords(text, 2)).toBe('a b\nc d'); + }); + + test('ignores extra whitespace', () => { + const text = ' alpha beta gamma '; + expect(wrapByWords(text, 2)).toBe('alpha beta\ngamma'); + }); + + test('handles single word', () => { + expect(wrapByWords('word', 2)).toBe('word'); + }); + + test('returns empty string for empty input', () => { + expect(wrapByWords('', 3)).toBe(''); + }); +}); diff --git a/src/utils/wrapByWords.ts b/src/utils/wrapByWords.ts new file mode 100644 index 0000000..1117671 --- /dev/null +++ b/src/utils/wrapByWords.ts @@ -0,0 +1,21 @@ +/** + * Wraps text into lines containing a fixed number of words. + * + * @param {string} text - The input text to wrap. + * @param {number} count - Maximum number of words per line. Must be greater than 0. + * @returns {string} The wrapped text, with lines separated by newline characters. + * + * @example + * wrapByWords("one two three four five", 2); + * // => "one two\nthree four\nfive" + */ +export function wrapByWords(text: string, count: number): string { + const words = text.trim().split(/\s+/); + const lines: string[] = []; + + for (let i = 0; i < words.length; i += count) { + lines.push(words.slice(i, i + count).join(' ')); + } + + return lines.join('\n'); +} From 10e0f5712a08832e60c84192fc63e3093e55773c Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Tue, 2 Sep 2025 20:37:05 -0600 Subject: [PATCH 11/16] Add util for wrapByLength --- src/utils/wrapByLength.test.ts | 45 ++++++++++++++++++++++++++++++++++ src/utils/wrapByLength.ts | 34 +++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 src/utils/wrapByLength.test.ts create mode 100644 src/utils/wrapByLength.ts diff --git a/src/utils/wrapByLength.test.ts b/src/utils/wrapByLength.test.ts new file mode 100644 index 0000000..22005a4 --- /dev/null +++ b/src/utils/wrapByLength.test.ts @@ -0,0 +1,45 @@ +import { wrapByLength } from './wrapByLength'; + +describe('wrapByLength', () => { + test('wraps text at given character length without breaking words', () => { + const text = 'This is a very long sentence'; + expect(wrapByLength(text, 10)).toBe('This is a\nvery long\nsentence'); + }); + + test('returns text unchanged if shorter than length', () => { + expect(wrapByLength('short text', 20)).toBe('short text'); + }); + + test('handles exact line length', () => { + expect(wrapByLength('12345 67890', 11)).toBe('12345 67890'); + }); + + test('splits multiple lines correctly', () => { + const text = 'one two three four five six seven eight nine'; + expect(wrapByLength(text, 13)).toBe( + 'one two three\nfour five six\nseven eight\nnine' + ); + }); + + test('ignores extra whitespace', () => { + const text = ' alpha beta gamma '; + expect(wrapByLength(text, 8)).toBe('alpha\nbeta\ngamma'); + }); + + test('handles single long word exceeding limit', () => { + const text = 'supercalifragilisticexpialidocious'; + // since function does not force-break words, it stays as is + expect(wrapByLength(text, 10)).toBe(text); + }); + + test('throws error if length is zero or negative', () => { + expect(() => wrapByLength('abc', 0)).toThrow( + 'wrap length must be greater than 0' + ); + expect(() => wrapByLength('abc', -5)).toThrow(); + }); + + test('returns empty string when input is empty', () => { + expect(wrapByLength('', 10)).toBe(''); + }); +}); diff --git a/src/utils/wrapByLength.ts b/src/utils/wrapByLength.ts new file mode 100644 index 0000000..7e871c3 --- /dev/null +++ b/src/utils/wrapByLength.ts @@ -0,0 +1,34 @@ +/** + * Wraps text into lines with a maximum number of characters. + * Breaks at word boundaries when possible. + * + * @param {string} text - The input text to wrap. + * @param {number} length - Maximum allowed characters per line. + * @returns {string} The wrapped text, with lines separated by newline characters. + * + * @example + * wrapByLength("This is a very long sentence", 10); + * // => "This is a\nvery long\nsentence" + */ +export function wrapByLength(text: string, length: number): string { + if (length <= 0) { + throw new Error('wrap length must be greater than 0'); + } + + const words = text.trim().split(/\s+/); + const lines: string[] = []; + let line = ''; + + for (const word of words) { + if ((line + ' ' + word).trim().length > length) { + if (line) lines.push(line.trim()); + line = word; + } else { + line += ' ' + word; + } + } + + if (line) lines.push(line.trim()); + + return lines.join('\n'); +} From ff213b06d45ba2c16513e5fd7cffb610c5a962ef Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Tue, 2 Sep 2025 20:39:11 -0600 Subject: [PATCH 12/16] use named function for utils --- src/utils/preserveFormat.test.ts | 2 +- src/utils/preserveFormat.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/preserveFormat.test.ts b/src/utils/preserveFormat.test.ts index 551efcd..90d04ab 100644 --- a/src/utils/preserveFormat.test.ts +++ b/src/utils/preserveFormat.test.ts @@ -1,4 +1,4 @@ -import preserveFormat from './preserveFormat'; +import { preserveFormat } from './preserveFormat'; describe('preserveFormat', () => { it('should return empty string for empty input', () => { diff --git a/src/utils/preserveFormat.ts b/src/utils/preserveFormat.ts index 5dd629b..c6a3925 100644 --- a/src/utils/preserveFormat.ts +++ b/src/utils/preserveFormat.ts @@ -3,7 +3,7 @@ interface PreserveFormatOptions { ignoreTags?: string[]; } -export default function preserveFormat({ +export function preserveFormat({ html, ignoreTags = [], }: PreserveFormatOptions): string { From b53a39d7988292b948c3e2ff463ec35cef02dac1 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Tue, 2 Sep 2025 20:43:37 -0600 Subject: [PATCH 13/16] add jsdoc to preserveFormat --- src/utils/preserveFormat.ts | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/utils/preserveFormat.ts b/src/utils/preserveFormat.ts index c6a3925..402377e 100644 --- a/src/utils/preserveFormat.ts +++ b/src/utils/preserveFormat.ts @@ -3,6 +3,36 @@ interface PreserveFormatOptions { ignoreTags?: string[]; } +/** + * Converts HTML to a more readable plain-text format while optionally preserving certain tags. + * - Converts headings and paragraphs to double newlines. + * - Converts `
              ` to newline. + * - Wraps bold (``, ``) in `**`. + * - Wraps italic (``, ``) in `*`. + * - Converts links `text` to `text (url)`. + * - Formats lists (`
                `, `
                  `) and list items. + * - Formats blockquotes (`
                  `) with `> ` prefix. + * - Converts tables to tab-delimited rows. + * - Decodes common HTML entities. + * - Collapses multiple newlines to a maximum of two. + * + * @param {Object} options - Options for preserving format. + * @param {string} options.html - The input HTML string to format. + * @param {string[]} [options.ignoreTags] - List of tags to leave intact (default: `[]`). + * @returns {string} The formatted plain-text representation of the HTML. + * + * @example + * preserveFormat({ html: '

                  Hello world

                  ' }); + * // => 'Hello **world**' + * + * @example + * preserveFormat({ html: '
                  • One
                  • Two
                  ' }); + * // => '- One\n- Two' + * + * @example + * preserveFormat({ html: 'Link', ignoreTags: ['a'] }); + * // => 'Link' + */ export function preserveFormat({ html, ignoreTags = [], From 6960e5647ab97e103a6e24fc55715f40759a7012 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Tue, 2 Sep 2025 20:44:14 -0600 Subject: [PATCH 14/16] update textify --- src/index.ts | 44 +++++++++++++++++++++++++++++++++++++++----- src/textify.test.ts | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/src/index.ts b/src/index.ts index a6360d5..969ce69 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,28 +1,54 @@ -import preserveFormat from './utils/preserveFormat'; +import { preserveFormat } from './utils/preserveFormat'; +import { wrapByLength } from './utils/wrapByLength'; +import { wrapByWords } from './utils/wrapByWords'; export interface TextifyOptions { html: string; preserveFormatting?: boolean; // optional, default true ignoreTags?: string[]; // optional tags to keep intact + wrapLength?: number; // max chars per line + wrapWords?: number; // max words per line } +/** + * Converts HTML to plain text with optional formatting and wrapping. + * + * @param {Object} options - Configuration options. + * @param {string} options.html - The input HTML string to convert. + * @param {boolean} [options.preserveFormatting=true] - Whether to preserve readable formatting. + * @param {string[]} [options.ignoreTags=[]] - List of HTML tags to keep intact. + * @param {number} [options.wrapLength] - Maximum characters per line (ignored if wrapWords is set). + * @param {number} [options.wrapWords] - Maximum words per line. Takes priority over wrapLength. + * @returns {string} The plain text result with optional wrapping. + * + * @example + * textify({ html: "

                  Hello world

                  ", preserveFormatting: false }); + * // => "Hello world" + * + * @example + * textify({ html: "

                  one two three four five

                  ", wrapWords: 2 }); + * // => "one two\nthree four\nfive" + * + * @example + * textify({ html: "

                  one two three four five

                  ", wrapLength: 10 }); + * // => "one two\nthree four\nfive" + */ export function textify({ html, preserveFormatting = true, ignoreTags = [], + wrapLength, + wrapWords, }: TextifyOptions): string { - // Ignore rest of the function if it's already empty if (!html) return ''; + // Strip or preserve HTML formatting if (preserveFormatting) { - // Keep readable formatting html = preserveFormat({ html, ignoreTags }); } else { if (ignoreTags.length === 0) { - // Strip all tags html = html.replace(/<[^>]+>/g, '').trim(); } else { - // Regex to match all tags except the ignored ones const IG = new Set(ignoreTags.map((t) => t.toLowerCase())); html = html .replace(/<\/?([a-z][a-z0-9-]*)\b[^>]*>/gi, (match, tag) => @@ -31,5 +57,13 @@ export function textify({ .trim(); } } + + // Wrap output text (word-based wrapping takes priority) + if (wrapWords && wrapWords > 0) { + html = wrapByWords(html, wrapWords); + } else if (wrapLength && wrapLength > 0) { + html = wrapByLength(html, wrapLength); + } + return html; } diff --git a/src/textify.test.ts b/src/textify.test.ts index 67f3eeb..80ed5a1 100644 --- a/src/textify.test.ts +++ b/src/textify.test.ts @@ -95,4 +95,37 @@ describe('textify', () => { // since preserveFormat handles it, just check it returns something non-empty expect(result).not.toBe(''); }); + + test('wraps text by word count when wrapWords is set', () => { + const html = '

                  one two three four five six seven

                  '; + const result = textify({ html, preserveFormatting: false, wrapWords: 3 }); + expect(result).toBe('one two three\nfour five six\nseven'); + }); + + test('wraps text by character length when wrapLength is set', () => { + const html = '

                  This is a test sentence for wrapping.

                  '; + const result = textify({ html, preserveFormatting: false, wrapLength: 10 }); + expect(result).toBe('This is a\ntest\nsentence\nfor\nwrapping.'); + }); + + test('wrapWords takes priority over wrapLength', () => { + const html = '

                  one two three four five

                  '; + const result = textify({ + html, + preserveFormatting: false, + wrapWords: 2, + wrapLength: 5, + }); + expect(result).toBe('one two\nthree four\nfive'); + }); + + test('does not wrap when wrapWords or wrapLength is zero or negative', () => { + const html = '

                  one two three

                  '; + expect(() => + textify({ html, preserveFormatting: false, wrapWords: 0 }) + ).not.toThrow(); + expect(() => + textify({ html, preserveFormatting: false, wrapLength: 0 }) + ).not.toThrow(); + }); }); From 8eee4d6df873c38f83e75051102c5d05f4cd0b6a Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Tue, 2 Sep 2025 20:46:32 -0600 Subject: [PATCH 15/16] update readme file --- README.md | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 3834e90..03280ed 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ Convert HTML into plain text while optionally preserving formatting and keeping - Convert HTML to plain text - Preserve formatting such as paragraphs, headings, lists, bold, italic, links, blockquotes, and tables - Optionally ignore specific tags to keep them in the output +- Wrap output by word count or character length - Handles self-closing tags and nested content - Strips unknown tags and decodes common HTML entities (` `, `&`, `<`, `>`) @@ -25,30 +26,60 @@ yarn add html-textify ## Usage ```ts -import { textify } from "html-textify"; +import { textify } from 'html-textify'; // Simple usage -const html = "

                  Hello World

                  "; +const html = '

                  Hello World

                  '; const plain = textify({ html }); -console.log(plain); // "**Hello** World" +console.log(plain); // "Hello **World**" // Preserve formatting but ignore certain tags -const html2 = "

                  Paragraph bold italic

                  "; +const html2 = '

                  Paragraph bold italic

                  '; const result = textify({ html: html2, preserveFormatting: true, - ignoreTags: ["b", "i"], + ignoreTags: ['b', 'i'], }); console.log(result); // "Paragraph bolditalic" // Strip all tags except ignored ones -const html3 = "

                  Paragraph highlighted

                  "; +const html3 = '

                  Paragraph highlighted

                  '; const stripped = textify({ html: html3, preserveFormatting: false, - ignoreTags: ["mark"], + ignoreTags: ['mark'], }); console.log(stripped); // "Paragraph highlighted" + +// Wrap by words (max 2 words per line) +const html4 = '

                  one two three four five

                  '; +const wrappedWords = textify({ + html: html4, + preserveFormatting: false, + wrapWords: 2, +}); +console.log(wrappedWords); +/* Output: +one two +three four +five +*/ + +// Wrap by characters (max 10 characters per line) +const html5 = '

                  This is a test sentence for wrapping.

                  '; +const wrappedChars = textify({ + html: html5, + preserveFormatting: false, + wrapLength: 10, +}); +console.log(wrappedChars); +/* Output: +This is a +test +sentence +for +wrapping. +*/ ``` ## API @@ -58,11 +89,13 @@ console.log(stripped); // "Paragraph highlighted" - `options.html (string)` – HTML string to convert - `options.preserveFormatting (boolean, default: true)` – Whether to keep formatting like lists, headings, blockquotes, bold/italic - `options.ignoreTags (string[], optional)` – Tags to keep intact in output (e.g., ["b", "mark"]) +- `options.wrapWords (number, optional)` – Maximum words per line (takes priority over wrapLength) +- `options.wrapLength (number, optional)` – Maximum characters per line ## Examples ```ts -import { textify } from "html-textify"; +import { textify } from 'html-textify'; const html = `

                  Title

                  From 98a0c1e04f0cc4bb4d0fd6cc3d4ec74f6b9e68d1 Mon Sep 17 00:00:00 2001 From: Sam Arjmandi Date: Tue, 2 Sep 2025 20:47:00 -0600 Subject: [PATCH 16/16] update package version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index f29bbd1..183baaf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "html-textify", - "version": "0.1.2", + "version": "1.0.0", "description": "Convert html to plain text", "main": "dist/index.js", "module": "dist/index.mjs",