Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
root = true

[*]
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
indent_style = space
indent_size = 2

[*.{diff,md}]
trim_trailing_whitespace = false
3 changes: 2 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CI Push
name: Push to main

on:
push:
Expand All @@ -19,5 +19,6 @@ jobs:
version: 8
- run: pnpm install
- run: pnpm lint
- run: pnpm format
- run: pnpm test
- run: pnpm build
3 changes: 3 additions & 0 deletions .github/workflows/pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ jobs:
- name: Run lint
run: pnpm lint

- name: Run format
run: pnpm format

- name: Run tests
run: pnpm test

Expand Down
2 changes: 2 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
node_modules
dist
7 changes: 7 additions & 0 deletions .prettierrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"semi": true,
"singleQuote": true,
"trailingComma": "es5",
"printWidth": 80,
"tabWidth": 2
}
49 changes: 41 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Convert HTML into plain text while optionally preserving formatting and keeping
- Convert HTML to plain text
- Preserve formatting such as paragraphs, headings, lists, bold, italic, links, blockquotes, and tables
- Optionally ignore specific tags to keep them in the output
- Wrap output by word count or character length
- Handles self-closing tags and nested content
- Strips unknown tags and decodes common HTML entities (` `, `&`, `<`, `>`)

Expand All @@ -25,30 +26,60 @@ yarn add html-textify
## Usage

```ts
import { textify } from "html-textify";
import { textify } from 'html-textify';

// Simple usage
const html = "<p>Hello <b>World</b></p>";
const html = '<p>Hello <b>World</b></p>';
const plain = textify({ html });
console.log(plain); // "**Hello** World"
console.log(plain); // "Hello **World**"

// Preserve formatting but ignore certain tags
const html2 = "<p>Paragraph <b>bold</b> <i>italic</i></p>";
const html2 = '<p>Paragraph <b>bold</b> <i>italic</i></p>';
const result = textify({
html: html2,
preserveFormatting: true,
ignoreTags: ["b", "i"],
ignoreTags: ['b', 'i'],
});
console.log(result); // "Paragraph <b>bold</b><i>italic</i>"

// Strip all tags except ignored ones
const html3 = "<p>Paragraph <mark>highlighted</mark></p>";
const html3 = '<p>Paragraph <mark>highlighted</mark></p>';
const stripped = textify({
html: html3,
preserveFormatting: false,
ignoreTags: ["mark"],
ignoreTags: ['mark'],
});
console.log(stripped); // "Paragraph <mark>highlighted</mark>"

// Wrap by words (max 2 words per line)
const html4 = '<p>one two three four five</p>';
const wrappedWords = textify({
html: html4,
preserveFormatting: false,
wrapWords: 2,
});
console.log(wrappedWords);
/* Output:
one two
three four
five
*/

// Wrap by characters (max 10 characters per line)
const html5 = '<p>This is a test sentence for wrapping.</p>';
const wrappedChars = textify({
html: html5,
preserveFormatting: false,
wrapLength: 10,
});
console.log(wrappedChars);
/* Output:
This is a
test
sentence
for
wrapping.
*/
```

## API
Expand All @@ -58,11 +89,13 @@ console.log(stripped); // "Paragraph <mark>highlighted</mark>"
- `options.html (string)` – HTML string to convert
- `options.preserveFormatting (boolean, default: true)` – Whether to keep formatting like lists, headings, blockquotes, bold/italic
- `options.ignoreTags (string[], optional)` – Tags to keep intact in output (e.g., ["b", "mark"])
- `options.wrapWords (number, optional)` – Maximum words per line (takes priority over wrapLength)
- `options.wrapLength (number, optional)` – Maximum characters per line

## Examples

```ts
import { textify } from "html-textify";
import { textify } from 'html-textify';

const html = `
<h1>Title</h1>
Expand Down
6 changes: 5 additions & 1 deletion eslint.config.mjs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import tseslint from "typescript-eslint";
import prettier from "eslint-plugin-prettier";
import prettierConfig from "eslint-config-prettier";

export default tseslint.config(
{
Expand All @@ -14,10 +16,12 @@ export default tseslint.config(
},
plugins: {
"@typescript-eslint": tseslint.plugin,
prettier,
},
rules: {
...tseslint.configs.recommended.rules,

...prettierConfig.rules, // disables conflicting ESLint rules
"prettier/prettier": "error", // enforce Prettier formatting
"@typescript-eslint/no-explicit-any": "warn",
"@typescript-eslint/explicit-function-return-type": "off",
},
Expand Down
7 changes: 6 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "html-textify",
"version": "0.1.2",
"version": "1.0.0",
"description": "Convert html to plain text",
"main": "dist/index.js",
"module": "dist/index.mjs",
Expand All @@ -17,6 +17,8 @@
"prepublishOnly": "pnpm build",
"lint": "eslint src --ext .ts",
"lint:fix": "eslint src --ext .ts --fix",
"format": "prettier --check \"src/**/*.{ts,js,json,md}\"",
"format:fix": "prettier --write \"src/**/*.{ts,js,json,md}\"",
"test": "jest",
"test:watch": "jest --watchAll"
},
Expand All @@ -36,7 +38,10 @@
"@types/jest": "^30.0.0",
"@types/node": "^24.3.0",
"eslint": "^9.34.0",
"eslint-config-prettier": "^10.1.8",
"eslint-plugin-prettier": "^5.5.4",
"jest": "^30.1.1",
"prettier": "^3.6.2",
"ts-jest": "^29.4.1",
"ts-node": "^10.9.2",
"tsup": "^8.5.0",
Expand Down
62 changes: 62 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 42 additions & 8 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,69 @@
import preserveFormat from "./utils/preserveFormat";
import { preserveFormat } from './utils/preserveFormat';
import { wrapByLength } from './utils/wrapByLength';
import { wrapByWords } from './utils/wrapByWords';

export interface TextifyOptions {
html: string;
preserveFormatting?: boolean; // optional, default true
ignoreTags?: string[]; // optional tags to keep intact
wrapLength?: number; // max chars per line
wrapWords?: number; // max words per line
}

/**
* Converts HTML to plain text with optional formatting and wrapping.
*
* @param {Object} options - Configuration options.
* @param {string} options.html - The input HTML string to convert.
* @param {boolean} [options.preserveFormatting=true] - Whether to preserve readable formatting.
* @param {string[]} [options.ignoreTags=[]] - List of HTML tags to keep intact.
* @param {number} [options.wrapLength] - Maximum characters per line (ignored if wrapWords is set).
* @param {number} [options.wrapWords] - Maximum words per line. Takes priority over wrapLength.
* @returns {string} The plain text result with optional wrapping.
*
* @example
* textify({ html: "<p>Hello <b>world</b></p>", preserveFormatting: false });
* // => "Hello world"
*
* @example
* textify({ html: "<p>one two three four five</p>", wrapWords: 2 });
* // => "one two\nthree four\nfive"
*
* @example
* textify({ html: "<p>one two three four five</p>", wrapLength: 10 });
* // => "one two\nthree four\nfive"
*/
export function textify({
html,
preserveFormatting = true,
ignoreTags = [],
wrapLength,
wrapWords,
}: TextifyOptions): string {
// Ignore rest of the function if it's already empty
if (!html) return "";
if (!html) return '';

// Strip or preserve HTML formatting
if (preserveFormatting) {
// Keep readable formatting
html = preserveFormat({ html, ignoreTags });
} else {
if (ignoreTags.length === 0) {
// Strip all tags
html = html.replace(/<[^>]+>/g, "").trim();
html = html.replace(/<[^>]+>/g, '').trim();
} else {
// Regex to match all tags except the ignored ones
const IG = new Set(ignoreTags.map((t) => t.toLowerCase()));
html = html
.replace(/<\/?([a-z][a-z0-9-]*)\b[^>]*>/gi, (match, tag) =>
IG.has(tag.toLowerCase()) ? match : ""
IG.has(tag.toLowerCase()) ? match : ''
)
.trim();
}
}

// Wrap output text (word-based wrapping takes priority)
if (wrapWords && wrapWords > 0) {
html = wrapByWords(html, wrapWords);
} else if (wrapLength && wrapLength > 0) {
html = wrapByLength(html, wrapLength);
}

return html;
}
Loading