From 6c9f3dd28da349d6b3fc77be127f863a0397fdd7 Mon Sep 17 00:00:00 2001 From: George Lund Date: Tue, 16 May 2023 18:08:37 +0100 Subject: [PATCH 01/12] Upgrade Mistune to version 3 (release candidate, so tentative) - header renderer function renamed to heading and args changed - link arguments changed - block_code arguments changed - image arguments changed and local filepath needs URL-decoding NB work in progress - tests not fixed --- md2cf/confluence_renderer.py | 37 ++++++++++++++++++------------------ md2cf/document.py | 1 - setup.py | 2 +- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py index ed46aa4..a5d35cd 100644 --- a/md2cf/confluence_renderer.py +++ b/md2cf/confluence_renderer.py @@ -59,7 +59,7 @@ def append(self, child): self.children.append(child) -class ConfluenceRenderer(mistune.Renderer): +class ConfluenceRenderer(mistune.HTMLRenderer): def __init__( self, strip_header=False, @@ -80,14 +80,14 @@ def reinit(self): self.relative_links = list() self.title = None - def header(self, text, level, raw=None): + def heading(self, text, level, **attrs): if self.title is None and level == 1: self.title = text # Don't duplicate page title as a header if self.strip_header: return "" - return super(ConfluenceRenderer, self).header(text, level, raw=raw) + return super(ConfluenceRenderer, self).heading(text, level, **attrs) def structured_macro(self, name): return ConfluenceTag("structured-macro", attrib={"name": name}) @@ -102,8 +102,8 @@ def plain_text_body(self, text): body_tag.text = text return body_tag - def link(self, link, title, text): - parsed_link = urlparse.urlparse(link) + def link(self, text, url, title=None): + parsed_link = urlparse.urlparse(url) if self.enable_relative_links and ( not parsed_link.scheme and not parsed_link.netloc @@ -115,44 +115,43 @@ def link(self, link, title, text): RelativeLink( path=parsed_link.path, replacement=replacement_link, - original=link, - escaped_original=mistune.escape_link(link), + original=url, + escaped_original=mistune.escape_link(url), ) ) - link = replacement_link - return super(ConfluenceRenderer, self).link(link, title, text) + url = replacement_link + return super(ConfluenceRenderer, self).link(text, url, title) def text(self, text): if self.remove_text_newlines: text = text.replace("\n", " ") - return super().text(text) - def block_code(self, code, lang=None): + def block_code(self, code, info=None): root_element = self.structured_macro("code") - if lang is not None: - lang_parameter = self.parameter(name="language", value=lang) + if info is not None: + lang_parameter = self.parameter(name="language", value=info) root_element.append(lang_parameter) root_element.append(self.parameter(name="linenumbers", value="true")) root_element.append(self.plain_text_body(code)) return root_element.render() - def image(self, src, title, text): - attributes = {"alt": text} + def image(self, alt, url, title=None): + attributes = {"alt": alt} if title: attributes["title"] = title root_element = ConfluenceTag(name="image", attrib=attributes) - parsed_source = urlparse.urlparse(src) + parsed_source = urlparse.urlparse(url) if not parsed_source.netloc: # Local file, requires upload - basename = Path(src).name + basename = Path(parsed_source.path).name url_tag = ConfluenceTag( "attachment", attrib={"filename": basename}, namespace="ri" ) - self.attachments.append(src) + self.attachments.append(urlparse.unquote(parsed_source.path)) else: - url_tag = ConfluenceTag("url", attrib={"value": src}, namespace="ri") + url_tag = ConfluenceTag("url", attrib={"value": url}, namespace="ri") root_element.append(url_tag) return root_element.render() diff --git a/md2cf/document.py b/md2cf/document.py index b0cce37..5b13753 100644 --- a/md2cf/document.py +++ b/md2cf/document.py @@ -268,7 +268,6 @@ def parse_page( enable_relative_links: bool = False, ) -> Page: renderer = ConfluenceRenderer( - use_xhtml=True, strip_header=strip_header, remove_text_newlines=remove_text_newlines, enable_relative_links=enable_relative_links, diff --git a/setup.py b/setup.py index 7d15fbd..f9ce788 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ install_requires=[ "rich-argparse==1.0.0", "rich==13.0.1", - "mistune==0.8.4", + "mistune==3.0.0rc5", "chardet==5.1.0", "requests==2.28.2", "PyYAML==6.0", From 5b9a222173a621140c3af704d507ebe7a74b24d2 Mon Sep 17 00:00:00 2001 From: George Lund Date: Wed, 17 May 2023 12:42:14 +0100 Subject: [PATCH 02/12] New version of mistune always escapes quotes using " entity --- tests/functional/result.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/functional/result.xml b/tests/functional/result.xml index ca22230..b56956b 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -41,16 +41,16 @@ inspiration for Markdown's syntax is the format of plain text email.

by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered blank.) Normal paragraphs should not be indented with spaces or tabs.

-

The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs +

The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break +Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a <br /> tag.

When you do want to insert a <br /> break tag using Markdown, you end a line with two or more spaces, then type return.

Headers

Markdown supports two styles of headers, [Setext] [1] and [atx] [2].

-

Optionally, you may "close" atx-style headers. This is purely +

Optionally, you may "close" atx-style headers. This is purely cosmetic -- you can use this if you think it looks better. The closing hashes don't even need to match the number of hashes used to open the header. (The number of opening hashes From c1536d3d3a630f31abbf63275b3c38f052029891 Mon Sep 17 00:00:00 2001 From: George Lund Date: Wed, 17 May 2023 12:44:07 +0100 Subject: [PATCH 03/12] Include demonstration of forcing a line break using two spaces --- .pre-commit-config.yaml | 1 + tests/functional/result.xml | 3 ++- tests/functional/test.md | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b5ed600..18ae735 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,7 @@ repos: hooks: - id: trailing-whitespace exclude: .bumpversion.cfg + args: [--markdown-linebreak-ext=md] - id: end-of-file-fixer exclude: '.bumpversion.cfg' - id: check-yaml diff --git a/tests/functional/result.xml b/tests/functional/result.xml index b56956b..a8ceb5b 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -47,7 +47,8 @@ significantly from most other text-to-HTML formatters (including Movable Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a <br /> tag.

When you do want to insert a <br /> break tag using Markdown, you -end a line with two or more spaces, then type return.

+end a line with two or more spaces, then type return, like...
+this.

Headers

Markdown supports two styles of headers, [Setext] [1] and [atx] [2].

Optionally, you may "close" atx-style headers. This is purely diff --git a/tests/functional/test.md b/tests/functional/test.md index 8aaab12..fe1b6ac 100644 --- a/tests/functional/test.md +++ b/tests/functional/test.md @@ -52,7 +52,8 @@ Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a `
` tag. When you *do* want to insert a `
` break tag using Markdown, you -end a line with two or more spaces, then type return. +end a line with two or more spaces, then type return, like... +this. ### Headers From 899c989c52ecc1f93f3dbac6990584c4053c1a63 Mon Sep 17 00:00:00 2001 From: George Lund Date: Wed, 17 May 2023 17:24:12 +0100 Subject: [PATCH 04/12] New version of Mistune treats blockquotes separate by a blank line as separate elements - new Mistune behaviour is correct - we're trying to test a single blockquote with multiple paragraphs (where only the first line of each para has an angle bracket) - so add the exrta angle bracket on the blank line, to correspond with the intended output --- tests/functional/test.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/test.md b/tests/functional/test.md index fe1b6ac..c842aa7 100644 --- a/tests/functional/test.md +++ b/tests/functional/test.md @@ -86,7 +86,7 @@ line of a hard-wrapped paragraph: > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - +> > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. From 50034524b4f511254fb45d055bb92c779a9f5640 Mon Sep 17 00:00:00 2001 From: George Lund Date: Wed, 17 May 2023 17:34:20 +0100 Subject: [PATCH 05/12] New version of Mistune puts

on a line of its own - no change to semantics of the output markup --- tests/functional/result.xml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/functional/result.xml b/tests/functional/result.xml index a8ceb5b..91ebaa7 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -61,7 +61,8 @@ determines the header level.)

familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a > before every line:

-

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

+

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse @@ -69,7 +70,8 @@ id sem consectetuer libero luctus adipiscing.

Markdown allows you to be lazy and only put the > before the first line of a hard-wrapped paragraph:

-

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

+

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse @@ -77,14 +79,17 @@ id sem consectetuer libero luctus adipiscing.

Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of >:

-

This is the first level of quoting.

-

This is nested blockquote.

+
+

This is the first level of quoting.

+
+

This is nested blockquote.

Back to the first level.

Blockquotes can contain other Markdown elements, including headers, lists, and code blocks:

-

This is a header.

+
+

This is a header.

  1. This is the first list item.
  2. This is the second list item.
  3. @@ -189,7 +194,8 @@ sit amet, consectetuer adipiscing elit.

    delimiters need to be indented:

    • A list item with a blockquote:

      -

      This is a blockquote +

      +

      This is a blockquote inside a list item.

    • From 579d09fce1ba3969c49b22995acf331067494e80 Mon Sep 17 00:00:00 2001 From: George Lund Date: Wed, 17 May 2023 17:55:06 +0100 Subject: [PATCH 06/12] The number of the first item in an ordered list affects the resulting markup - there is a substantial difference betwen the [syntax previously assumed](https://daringfireball.net/projects/markdown/syntax#list) and the [CommonMark syntax](https://spec.commonmark.org/0.30/#start-number) - Mistune now follows CommonMark so the example doc must change Arguably the example doc here is not testing md2cf functionality and should be removed - It's not particularly useful to test a lib that we depend on, unless we are testing something that matters to Confluence - Even then, deviation from CommonMark is not helpful, though potentially we could flag a warning somehow --- tests/functional/result.xml | 13 ++++++------- tests/functional/test.md | 11 +++++------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/functional/result.xml b/tests/functional/result.xml index 91ebaa7..c74bbb2 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -129,22 +129,21 @@ Quote Level from the Text menu.

    • McHale
    • Parish
-

It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is:

-

If you instead wrote the list in Markdown like this:

+

Only the first number in a list has any effect.

+

You can therefore write the same list in Markdown like this:

  1. Bird
  2. McHale
  3. Parish
-

or even:

-
    +

    You specify a different start number, but the numbers you use for +subsequent items will be ignored:

    +
    1. Bird
    2. McHale
    3. Parish
    -

    you'd get the exact same HTML output. The point is, if you want to, +

    The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don't have to.

    diff --git a/tests/functional/test.md b/tests/functional/test.md index c842aa7..0fd7ee3 100644 --- a/tests/functional/test.md +++ b/tests/functional/test.md @@ -145,23 +145,22 @@ Ordered lists use numbers followed by periods: 2. McHale 3. Parish -It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is: +Only the first number in a list has any effect. -If you instead wrote the list in Markdown like this: +You can therefore write the same list in Markdown like this: 1. Bird 1. McHale 1. Parish -or even: +You specify a different start number, but the numbers you use for +subsequent items will be ignored: 3. Bird 1. McHale 8. Parish -you'd get the exact same HTML output. The point is, if you want to, +The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don't have to. From 9a5e13dc01e3a29a5f5ef4d971b938ee672e201d Mon Sep 17 00:00:00 2001 From: George Lund Date: Thu, 18 May 2023 10:57:20 +0100 Subject: [PATCH 07/12] Keep ConfluenceRenderer handling of code blocks same as Mistune MarkdownRenderer - all code blocks end with a newline - this means updating some examples in the functional test as they expected inconsistency --- md2cf/confluence_renderer.py | 2 ++ tests/functional/result.xml | 12 ++++++------ tests/unit/test_renderer.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py index a5d35cd..0447c4f 100644 --- a/md2cf/confluence_renderer.py +++ b/md2cf/confluence_renderer.py @@ -133,6 +133,8 @@ def block_code(self, code, info=None): lang_parameter = self.parameter(name="language", value=info) root_element.append(lang_parameter) root_element.append(self.parameter(name="linenumbers", value="true")) + if code and code[-1] != "\n": + code += "\n" root_element.append(self.plain_text_body(code)) return root_element.render() diff --git a/tests/functional/result.xml b/tests/functional/result.xml index c74bbb2..cb4bba4 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -96,7 +96,8 @@ and code blocks:

Here's some example code:

true - +

Any decent text editor should make email-style quoting easy. For @@ -204,7 +205,8 @@ to be indented twice -- 8 spaces or two tabs:

  • A list item with a code block:

    true -]]> + +]]>
@@ -218,7 +220,6 @@ block by at least 4 spaces or 1 tab.

This is a normal paragraph:

true

Here is an example of AppleScript:

@@ -226,7 +227,6 @@ block by at least 4 spaces or 1 tab.

A code block continues until it reaches a line that is not indented @@ -240,7 +240,6 @@ ampersands and angle brackets. For example, this:

© 2004 Foo Corporation - ]]>

Regular Markdown syntax is not processed within code blocks. E.g., @@ -249,7 +248,8 @@ it's also easy to use Markdown to write about Markdown's own syntax.

true +end tell +]]>

Span Elements

Links

diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py index 40c50fc..02cb714 100644 --- a/tests/unit/test_renderer.py +++ b/tests/unit/test_renderer.py @@ -124,7 +124,7 @@ def test_renderer_block_code(): test_markup = ( '' 'true\n' - "\n" + "\n" "\n" ) @@ -140,7 +140,7 @@ def test_renderer_block_code_with_language(): '' 'whitespace\n' 'true\n' - "\n" + "\n" "\n" ) From 35de663f0e767ea8838fb7e37fbf3561b85f0aaf Mon Sep 17 00:00:00 2001 From: George Lund Date: Thu, 18 May 2023 12:09:48 +0100 Subject: [PATCH 08/12] Align implemention of block_code with that in Mistune HTMLRenderer - add functional test of language tag on code block - fix unit test for block_code given renamed parameter --- md2cf/confluence_renderer.py | 6 +++++- tests/functional/result.xml | 6 ++++++ tests/functional/test.md | 6 ++++++ tests/unit/test_renderer.py | 2 +- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py index 0447c4f..a221b4d 100644 --- a/md2cf/confluence_renderer.py +++ b/md2cf/confluence_renderer.py @@ -4,6 +4,7 @@ from typing import List, NamedTuple import mistune +from mistune.util import safe_entity class RelativeLink(NamedTuple): @@ -130,7 +131,10 @@ def text(self, text): def block_code(self, code, info=None): root_element = self.structured_macro("code") if info is not None: - lang_parameter = self.parameter(name="language", value=info) + info = safe_entity(info.strip()) + if info: + lang = info.split(None, 1)[0] + lang_parameter = self.parameter(name="language", value=lang) root_element.append(lang_parameter) root_element.append(self.parameter(name="linenumbers", value="true")) if code and code[-1] != "\n": diff --git a/tests/functional/result.xml b/tests/functional/result.xml index cb4bba4..f96550a 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -251,6 +251,12 @@ it's also easy to use Markdown to write about Markdown's own syntax.

end tell ]]> +

Indicate the language thus:

+python +true + +

Span Elements

Links

Markdown supports two style of links: inline and reference.

diff --git a/tests/functional/test.md b/tests/functional/test.md index 0fd7ee3..4cbe7e4 100644 --- a/tests/functional/test.md +++ b/tests/functional/test.md @@ -265,6 +265,12 @@ tell application "Foo" end tell ``` +Indicate the language thus: + +```python +hello = "hello" +``` + ## Span Elements ### Links diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py index 02cb714..ab0eb73 100644 --- a/tests/unit/test_renderer.py +++ b/tests/unit/test_renderer.py @@ -146,7 +146,7 @@ def test_renderer_block_code_with_language(): renderer = ConfluenceRenderer() - assert renderer.block_code(test_code, lang=test_language) == test_markup + assert renderer.block_code(test_code, info=test_language) == test_markup def test_renderer_header_sets_title(): From 9fbb343ec88b97c2a736d2840d086c47fe0315ca Mon Sep 17 00:00:00 2001 From: George Lund Date: Thu, 18 May 2023 12:11:09 +0100 Subject: [PATCH 09/12] Fix unit tests given renamed header/heading function --- tests/unit/test_renderer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py index ab0eb73..950e9a1 100644 --- a/tests/unit/test_renderer.py +++ b/tests/unit/test_renderer.py @@ -112,7 +112,7 @@ def test_tag_render_with_child_and_text(): def test_renderer_reinit(): renderer = ConfluenceRenderer() - renderer.header("this is a title", 1) + renderer.heading("this is a title", 1) assert renderer.title is not None renderer.reinit() @@ -153,7 +153,7 @@ def test_renderer_header_sets_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_header, 1) + renderer.heading(test_header, 1) assert renderer.title == test_header @@ -162,7 +162,7 @@ def test_renderer_strips_header(): test_header = "this is a header" renderer = ConfluenceRenderer(strip_header=True) - result = renderer.header(test_header, 1) + result = renderer.heading(test_header, 1) assert result == "" @@ -171,7 +171,7 @@ def test_renderer_header_lower_level_does_not_set_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_header, 2) + renderer.heading(test_header, 2) assert renderer.title is None @@ -181,8 +181,8 @@ def test_renderer_header_later_level_sets_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_lower_header, 2) - renderer.header(test_header, 1) + renderer.heading(test_lower_header, 2) + renderer.heading(test_header, 1) assert renderer.title is test_header @@ -192,8 +192,8 @@ def test_renderer_header_only_sets_first_title(): test_second_header = "this is another header" renderer = ConfluenceRenderer() - renderer.header(test_header, 1) - renderer.header(test_second_header, 1) + renderer.heading(test_header, 1) + renderer.heading(test_second_header, 1) assert renderer.title is test_header From 1474746abba4c01fb6add858e36add687d1dd0fa Mon Sep 17 00:00:00 2001 From: George Lund Date: Thu, 18 May 2023 12:36:22 +0100 Subject: [PATCH 10/12] Adjust unit tests to account for parameter order change --- tests/unit/test_renderer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py index 950e9a1..156a03e 100644 --- a/tests/unit/test_renderer.py +++ b/tests/unit/test_renderer.py @@ -207,7 +207,7 @@ def test_renderer_image_external(): renderer = ConfluenceRenderer() - assert renderer.image(test_image_src, "", "") == test_image_markup + assert renderer.image("", test_image_src, "") == test_image_markup assert not renderer.attachments @@ -223,7 +223,7 @@ def test_renderer_image_external_alt_and_title(): renderer = ConfluenceRenderer() assert ( - renderer.image(test_image_src, test_image_title, test_image_alt) + renderer.image(test_image_alt, test_image_src, test_image_title) == test_image_markup ) @@ -238,7 +238,7 @@ def test_renderer_image_internal_absolute(): renderer = ConfluenceRenderer() - assert renderer.image(test_image_src, "", "") == test_image_markup + assert renderer.image("", test_image_src, "") == test_image_markup assert renderer.attachments == [test_image_src] @@ -252,7 +252,7 @@ def test_renderer_image_internal_relative(): renderer = ConfluenceRenderer() - assert renderer.image(test_image_src, "", "") == test_image_markup + assert renderer.image("", test_image_src, "") == test_image_markup assert renderer.attachments == [test_image_src] From 457e923166771e481a1ce7f59f2ee343d79f9b67 Mon Sep 17 00:00:00 2001 From: George Lund Date: Thu, 18 May 2023 12:49:41 +0100 Subject: [PATCH 11/12] Add tests for both ways of handling newlines - turns out this is easy to break as Mistune behaviour has changed --- tests/unit/test_document.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py index 944a87b..bb5484f 100644 --- a/tests/unit/test_document.py +++ b/tests/unit/test_document.py @@ -1,3 +1,4 @@ +from io import StringIO from pathlib import Path import md2cf.document as doc @@ -244,3 +245,26 @@ def test_get_document_frontmatter_empty(): """ assert doc.get_document_frontmatter(source_markdown.splitlines(keepends=True)) == {} + + +def test_parse_page_with_newlines(): + source_markdown = """ +Line1 +Line2 +""" + lines = StringIO(source_markdown).readlines() + assert ( + doc.parse_page(lines, remove_text_newlines=False).body + == "

Line1\nLine2

\n" + ) + + +def test_parse_page_with_newlines_removed(): + source_markdown = """ +Line1 +Line2 +""" + lines = StringIO(source_markdown).readlines() + assert ( + doc.parse_page(lines, remove_text_newlines=True).body == "

Line1 Line2

\n" + ) From f3cb5daebfad6486ae5c525b56721913b0add3a4 Mon Sep 17 00:00:00 2001 From: George Lund Date: Thu, 18 May 2023 13:14:24 +0100 Subject: [PATCH 12/12] Newline removal now has to be done in parsing rather than rendering due to the Mistune upgrade - each text callback occurs with a single line/token so doesn't receive the newline - instead we define a special InlineParser that handles soft break as spaces rather than newlines --- md2cf/confluence_renderer.py | 7 ------- md2cf/document.py | 14 ++++++++++++-- tests/unit/test_renderer.py | 9 --------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py index a221b4d..16387fb 100644 --- a/md2cf/confluence_renderer.py +++ b/md2cf/confluence_renderer.py @@ -64,13 +64,11 @@ class ConfluenceRenderer(mistune.HTMLRenderer): def __init__( self, strip_header=False, - remove_text_newlines=False, enable_relative_links=False, **kwargs, ): super().__init__(**kwargs) self.strip_header = strip_header - self.remove_text_newlines = remove_text_newlines self.attachments = list() self.title = None self.enable_relative_links = enable_relative_links @@ -123,11 +121,6 @@ def link(self, text, url, title=None): url = replacement_link return super(ConfluenceRenderer, self).link(text, url, title) - def text(self, text): - if self.remove_text_newlines: - text = text.replace("\n", " ") - return super().text(text) - def block_code(self, code, info=None): root_element = self.structured_macro("code") if info is not None: diff --git a/md2cf/document.py b/md2cf/document.py index 5b13753..da05399 100644 --- a/md2cf/document.py +++ b/md2cf/document.py @@ -71,6 +71,12 @@ def __repr__(self): ) +class LineBreakIgnoringInlineParser(mistune.InlineParser): + def parse_softbreak(self, m, state) -> int: + state.append_token({"type": "text", "raw": " "}) + return m.end() + + def find_non_empty_parent_path( current_dir: Path, folder_data: Dict[Path, Dict[str, Any]], default: Path ) -> Path: @@ -269,10 +275,14 @@ def parse_page( ) -> Page: renderer = ConfluenceRenderer( strip_header=strip_header, - remove_text_newlines=remove_text_newlines, enable_relative_links=enable_relative_links, ) - confluence_mistune = mistune.Markdown(renderer=renderer) + if remove_text_newlines: + inline_parser = LineBreakIgnoringInlineParser() + else: + inline_parser = mistune.InlineParser() + + confluence_mistune = mistune.Markdown(renderer=renderer, inline=inline_parser) confluence_content = confluence_mistune("".join(markdown_lines)) page = Page( diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py index 156a03e..8c3226e 100644 --- a/tests/unit/test_renderer.py +++ b/tests/unit/test_renderer.py @@ -254,12 +254,3 @@ def test_renderer_image_internal_relative(): assert renderer.image("", test_image_src, "") == test_image_markup assert renderer.attachments == [test_image_src] - - -def test_renderer_remove_text_newlines(): - test_text = "This is a paragraph\nwith some newlines\nin it." - test_stripped_text = "This is a paragraph with some newlines in it." - - renderer = ConfluenceRenderer(remove_text_newlines=True) - - assert renderer.text(test_text) == test_stripped_text