From f1db140da769717b90afc8ae8f12adbabdad628f Mon Sep 17 00:00:00 2001 From: Holden Date: Mon, 16 Mar 2026 23:54:07 +0800 Subject: [PATCH] fix: preserve line breaks when converting HTML to markdown --- lib/confluence-client.js | 8 +-- tests/confluence-client.test.js | 107 +++++++++++++++++++++++++++++++- 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/lib/confluence-client.js b/lib/confluence-client.js index 84802bd..f3a2e65 100644 --- a/lib/confluence-client.js +++ b/lib/confluence-client.js @@ -1293,12 +1293,12 @@ class ConfluenceClient { // Convert Confluence code macros to markdown markdown = markdown.replace(/]*>[\s\S]*?([^<]*)<\/ac:parameter>[\s\S]*?<\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, lang, code) => { - return `\`\`\`${lang}\n${code}\n\`\`\``; + return `\n\`\`\`${lang}\n${code}\n\`\`\`\n`; }); // Convert code macros without language parameter markdown = markdown.replace(/]*>[\s\S]*?<\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, code) => { - return `\`\`\`\n${code}\n\`\`\``; + return `\n\`\`\`\n${code}\n\`\`\`\n`; }); // Convert info macro to admonition @@ -1515,8 +1515,8 @@ class ConfluenceClient { }); // Convert paragraphs (after lists and tables) - markdown = markdown.replace(/

(.*?)<\/p>/g, (_, content) => { - return content.trim() + '\n'; + markdown = markdown.replace(/

(.*?)<\/p>/gs, (_, content) => { + return '\n' + content.trim() + '\n'; }); // Convert line breaks diff --git a/tests/confluence-client.test.js b/tests/confluence-client.test.js index 5f40248..9e8b921 100644 --- a/tests/confluence-client.test.js +++ b/tests/confluence-client.test.js @@ -379,12 +379,59 @@ describe('ConfluenceClient', () => { test('should convert Confluence code macro to markdown', () => { const storage = 'javascript'; const result = client.storageToMarkdown(storage); - + expect(result).toContain('```javascript'); expect(result).toContain('console.log("Hello");'); expect(result).toContain('```'); }); + test('should separate code block (with language) from surrounding content with blank lines', () => { + const storage = '

Intro

python

Outro

'; + const result = client.storageToMarkdown(storage); + expect(result).toMatch(/Intro\n\n/); + expect(result).toMatch(/\n\n```python\n/); + expect(result).toMatch(/\n```\n\n/); + expect(result).toMatch(/\n\nOutro/); + }); + + test('should separate code block (no language) from surrounding content with blank lines', () => { + const storage = '

Before

After

'; + const result = client.storageToMarkdown(storage); + expect(result).toMatch(/Before\n\n/); + expect(result).toMatch(/\n\n```\n/); + expect(result).toMatch(/\n```\n\n/); + expect(result).toMatch(/\n\nAfter/); + }); + + test('should separate mermaid macro from surrounding content with blank lines', () => { + const storage = '

Diagram:

B]]>

End

'; + const result = client.storageToMarkdown(storage); + expect(result).toMatch(/Diagram:\n\n/); + expect(result).toMatch(/\n\n```mermaid\n/); + expect(result).toMatch(/\n```\n\n/); + expect(result).toMatch(/\n\nEnd/); + }); + + test('complex page: heading, multi-line paragraph, code block, ordered list', () => { + const storage = [ + '

Deployment Guide

', + '

Deploy using the following steps.\nEnsure prerequisites are met.

', + 'bash', + '

Then verify:

', + '
  1. Check logs
  2. Run smoke tests
', + '

Deployment complete.

' + ].join(''); + const result = client.storageToMarkdown(storage); + expect(result).toBe( + '# Deployment Guide\n\n' + + 'Deploy using the following steps.\nEnsure prerequisites are met.\n\n' + + '```bash\ngit pull origin main\nnpm run build\n```\n\n' + + 'Then verify:\n\n' + + '1. Check logs\n2. Run smoke tests\n\n' + + 'Deployment complete.' + ); + }); + test('should convert Confluence macros to admonitions', () => { const storage = '

This is info

'; const result = client.storageToMarkdown(storage); @@ -428,6 +475,64 @@ describe('ConfluenceClient', () => { expect(result).toContain('| Cell |'); }); + test('should preserve content of multi-line paragraphs', () => { + // Without the dotAll flag on the

regex, content with embedded newlines is silently dropped + const html = '

First line\nSecond line

'; + const result = client.htmlToMarkdown(html); + expect(result).toContain('First line'); + expect(result).toContain('Second line'); + }); + + test('should separate consecutive paragraphs with a blank line', () => { + const html = '

Alpha

Beta

'; + const result = client.htmlToMarkdown(html); + expect(result).toMatch(/Alpha\n\nBeta/); + }); + + test('should separate lists from surrounding content with blank lines', () => { + const html = '

Intro

  • Item A
  • Item B

Outro

'; + const result = client.htmlToMarkdown(html); + expect(result).toMatch(/Intro\n\n/); + expect(result).toMatch(/\n\n- Item A\n- Item B\n\n/); + expect(result).toMatch(/\n\nOutro/); + }); + + test('should separate ordered lists from surrounding content with blank lines', () => { + const html = '

Steps:

  1. First
  2. Second

Done

'; + const result = client.htmlToMarkdown(html); + expect(result).toMatch(/Steps:\n\n/); + expect(result).toMatch(/\n\n1\. First\n2\. Second\n\n/); + expect(result).toMatch(/\n\nDone/); + }); + + test('should separate tables from surrounding content with blank lines', () => { + const html = '

See table:

Col
Val

End

'; + const result = client.htmlToMarkdown(html); + expect(result).toMatch(/See table:\n\n/); + expect(result).toMatch(/\| Col \|/); + expect(result).toMatch(/\n\nEnd/); + }); + + test('complex page: heading, multi-line paragraph, table, list', () => { + const html = [ + '

API Reference

', + '

The following endpoints are available.\nAll requests require authentication.

', + '
MethodPath
GET/users
POST/users
', + '

Authentication options:

', + '
  • Bearer token
  • API key
', + '

See docs for details.

' + ].join(''); + const result = client.htmlToMarkdown(html); + expect(result).toBe( + '## API Reference\n\n' + + 'The following endpoints are available.\nAll requests require authentication.\n\n' + + '| Method | Path |\n| --- | --- |\n| GET | /users |\n| POST | /users |\n\n' + + 'Authentication options:\n\n' + + '- Bearer token\n- API key\n\n' + + 'See docs for details.' + ); + }); + test('should convert named characters correctly', () => { const NAMED_ENTITIES = ConfluenceClient.NAMED_ENTITIES;