diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b5ed600..18ae735 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,7 @@ repos: hooks: - id: trailing-whitespace exclude: .bumpversion.cfg + args: [--markdown-linebreak-ext=md] - id: end-of-file-fixer exclude: '.bumpversion.cfg' - id: check-yaml diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py index ed46aa4..16387fb 100644 --- a/md2cf/confluence_renderer.py +++ b/md2cf/confluence_renderer.py @@ -4,6 +4,7 @@ from typing import List, NamedTuple import mistune +from mistune.util import safe_entity class RelativeLink(NamedTuple): @@ -59,17 +60,15 @@ def append(self, child): self.children.append(child) -class ConfluenceRenderer(mistune.Renderer): +class ConfluenceRenderer(mistune.HTMLRenderer): def __init__( self, strip_header=False, - remove_text_newlines=False, enable_relative_links=False, **kwargs, ): super().__init__(**kwargs) self.strip_header = strip_header - self.remove_text_newlines = remove_text_newlines self.attachments = list() self.title = None self.enable_relative_links = enable_relative_links @@ -80,14 +79,14 @@ def reinit(self): self.relative_links = list() self.title = None - def header(self, text, level, raw=None): + def heading(self, text, level, **attrs): if self.title is None and level == 1: self.title = text # Don't duplicate page title as a header if self.strip_header: return "" - return super(ConfluenceRenderer, self).header(text, level, raw=raw) + return super(ConfluenceRenderer, self).heading(text, level, **attrs) def structured_macro(self, name): return ConfluenceTag("structured-macro", attrib={"name": name}) @@ -102,8 +101,8 @@ def plain_text_body(self, text): body_tag.text = text return body_tag - def link(self, link, title, text): - parsed_link = urlparse.urlparse(link) + def link(self, text, url, title=None): + parsed_link = urlparse.urlparse(url) if self.enable_relative_links and ( not parsed_link.scheme and not parsed_link.netloc @@ -115,44 +114,43 @@ def link(self, link, title, text): RelativeLink( path=parsed_link.path, replacement=replacement_link, - original=link, - escaped_original=mistune.escape_link(link), + original=url, + escaped_original=mistune.escape_link(url), ) ) - link = replacement_link - return super(ConfluenceRenderer, self).link(link, title, text) + url = replacement_link + return super(ConfluenceRenderer, self).link(text, url, title) - def text(self, text): - if self.remove_text_newlines: - text = text.replace("\n", " ") - - return super().text(text) - - def block_code(self, code, lang=None): + def block_code(self, code, info=None): root_element = self.structured_macro("code") - if lang is not None: + if info is not None: + info = safe_entity(info.strip()) + if info: + lang = info.split(None, 1)[0] lang_parameter = self.parameter(name="language", value=lang) root_element.append(lang_parameter) root_element.append(self.parameter(name="linenumbers", value="true")) + if code and code[-1] != "\n": + code += "\n" root_element.append(self.plain_text_body(code)) return root_element.render() - def image(self, src, title, text): - attributes = {"alt": text} + def image(self, alt, url, title=None): + attributes = {"alt": alt} if title: attributes["title"] = title root_element = ConfluenceTag(name="image", attrib=attributes) - parsed_source = urlparse.urlparse(src) + parsed_source = urlparse.urlparse(url) if not parsed_source.netloc: # Local file, requires upload - basename = Path(src).name + basename = Path(parsed_source.path).name url_tag = ConfluenceTag( "attachment", attrib={"filename": basename}, namespace="ri" ) - self.attachments.append(src) + self.attachments.append(urlparse.unquote(parsed_source.path)) else: - url_tag = ConfluenceTag("url", attrib={"value": src}, namespace="ri") + url_tag = ConfluenceTag("url", attrib={"value": url}, namespace="ri") root_element.append(url_tag) return root_element.render() diff --git a/md2cf/document.py b/md2cf/document.py index b0cce37..da05399 100644 --- a/md2cf/document.py +++ b/md2cf/document.py @@ -71,6 +71,12 @@ def __repr__(self): ) +class LineBreakIgnoringInlineParser(mistune.InlineParser): + def parse_softbreak(self, m, state) -> int: + state.append_token({"type": "text", "raw": " "}) + return m.end() + + def find_non_empty_parent_path( current_dir: Path, folder_data: Dict[Path, Dict[str, Any]], default: Path ) -> Path: @@ -268,12 +274,15 @@ def parse_page( enable_relative_links: bool = False, ) -> Page: renderer = ConfluenceRenderer( - use_xhtml=True, strip_header=strip_header, - remove_text_newlines=remove_text_newlines, enable_relative_links=enable_relative_links, ) - confluence_mistune = mistune.Markdown(renderer=renderer) + if remove_text_newlines: + inline_parser = LineBreakIgnoringInlineParser() + else: + inline_parser = mistune.InlineParser() + + confluence_mistune = mistune.Markdown(renderer=renderer, inline=inline_parser) confluence_content = confluence_mistune("".join(markdown_lines)) page = Page( diff --git a/setup.py b/setup.py index 7d15fbd..f9ce788 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ install_requires=[ "rich-argparse==1.0.0", "rich==13.0.1", - "mistune==0.8.4", + "mistune==3.0.0rc5", "chardet==5.1.0", "requests==2.28.2", "PyYAML==6.0", diff --git a/tests/functional/result.xml b/tests/functional/result.xml index ca22230..f96550a 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -41,16 +41,17 @@ inspiration for Markdown's syntax is the format of plain text email.

by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered blank.) Normal paragraphs should not be indented with spaces or tabs.

-

The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs +

The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break +Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a <br /> tag.

When you do want to insert a <br /> break tag using Markdown, you -end a line with two or more spaces, then type return.

+end a line with two or more spaces, then type return, like...
+this.

Headers

Markdown supports two styles of headers, [Setext] [1] and [atx] [2].

-

Optionally, you may "close" atx-style headers. This is purely +

Optionally, you may "close" atx-style headers. This is purely cosmetic -- you can use this if you think it looks better. The closing hashes don't even need to match the number of hashes used to open the header. (The number of opening hashes @@ -60,7 +61,8 @@ determines the header level.)

familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a > before every line:

-

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

+

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse @@ -68,7 +70,8 @@ id sem consectetuer libero luctus adipiscing.

Markdown allows you to be lazy and only put the > before the first line of a hard-wrapped paragraph:

-

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

+

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse @@ -76,21 +79,25 @@ id sem consectetuer libero luctus adipiscing.

Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of >:

-

This is the first level of quoting.

-

This is nested blockquote.

+
+

This is the first level of quoting.

+
+

This is nested blockquote.

Back to the first level.

Blockquotes can contain other Markdown elements, including headers, lists, and code blocks:

-

This is a header.

+
+

This is a header.

  1. This is the first list item.
  2. This is the second list item.

Here's some example code:

true - +

Any decent text editor should make email-style quoting easy. For @@ -123,22 +130,21 @@ Quote Level from the Text menu.

  • McHale
  • Parish
  • -

    It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is:

    -

    If you instead wrote the list in Markdown like this:

    +

    Only the first number in a list has any effect.

    +

    You can therefore write the same list in Markdown like this:

    1. Bird
    2. McHale
    3. Parish
    -

    or even:

    -
      +

      You specify a different start number, but the numbers you use for +subsequent items will be ignored:

      +
      1. Bird
      2. McHale
      3. Parish
      -

      you'd get the exact same HTML output. The point is, if you want to, +

      The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don't have to.

      @@ -188,7 +194,8 @@ sit amet, consectetuer adipiscing elit.

      delimiters need to be indented:

      • A list item with a blockquote:

        -

        This is a blockquote +

        +

        This is a blockquote inside a list item.

      • @@ -198,7 +205,8 @@ to be indented twice -- 8 spaces or two tabs:

        • A list item with a code block:

          true -]]> + +]]>
        @@ -212,7 +220,6 @@ block by at least 4 spaces or 1 tab.

        This is a normal paragraph:

        true

        Here is an example of AppleScript:

        @@ -220,7 +227,6 @@ block by at least 4 spaces or 1 tab.

        A code block continues until it reaches a line that is not indented @@ -234,7 +240,6 @@ ampersands and angle brackets. For example, this:

        © 2004 Foo Corporation - ]]>

        Regular Markdown syntax is not processed within code blocks. E.g., @@ -243,7 +248,14 @@ it's also easy to use Markdown to write about Markdown's own syntax.

        true +end tell +]]> + +

        Indicate the language thus:

        +python +true +

        Span Elements

        Links

        diff --git a/tests/functional/test.md b/tests/functional/test.md index 8aaab12..4cbe7e4 100644 --- a/tests/functional/test.md +++ b/tests/functional/test.md @@ -52,7 +52,8 @@ Type's "Convert Line Breaks" option) which translate every line break character in a paragraph into a `
        ` tag. When you *do* want to insert a `
        ` break tag using Markdown, you -end a line with two or more spaces, then type return. +end a line with two or more spaces, then type return, like... +this. ### Headers @@ -85,7 +86,7 @@ line of a hard-wrapped paragraph: > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - +> > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing. @@ -144,23 +145,22 @@ Ordered lists use numbers followed by periods: 2. McHale 3. Parish -It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is: +Only the first number in a list has any effect. -If you instead wrote the list in Markdown like this: +You can therefore write the same list in Markdown like this: 1. Bird 1. McHale 1. Parish -or even: +You specify a different start number, but the numbers you use for +subsequent items will be ignored: 3. Bird 1. McHale 8. Parish -you'd get the exact same HTML output. The point is, if you want to, +The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don't have to. @@ -265,6 +265,12 @@ tell application "Foo" end tell ``` +Indicate the language thus: + +```python +hello = "hello" +``` + ## Span Elements ### Links diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py index 944a87b..bb5484f 100644 --- a/tests/unit/test_document.py +++ b/tests/unit/test_document.py @@ -1,3 +1,4 @@ +from io import StringIO from pathlib import Path import md2cf.document as doc @@ -244,3 +245,26 @@ def test_get_document_frontmatter_empty(): """ assert doc.get_document_frontmatter(source_markdown.splitlines(keepends=True)) == {} + + +def test_parse_page_with_newlines(): + source_markdown = """ +Line1 +Line2 +""" + lines = StringIO(source_markdown).readlines() + assert ( + doc.parse_page(lines, remove_text_newlines=False).body + == "

        Line1\nLine2

        \n" + ) + + +def test_parse_page_with_newlines_removed(): + source_markdown = """ +Line1 +Line2 +""" + lines = StringIO(source_markdown).readlines() + assert ( + doc.parse_page(lines, remove_text_newlines=True).body == "

        Line1 Line2

        \n" + ) diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py index 40c50fc..8c3226e 100644 --- a/tests/unit/test_renderer.py +++ b/tests/unit/test_renderer.py @@ -112,7 +112,7 @@ def test_tag_render_with_child_and_text(): def test_renderer_reinit(): renderer = ConfluenceRenderer() - renderer.header("this is a title", 1) + renderer.heading("this is a title", 1) assert renderer.title is not None renderer.reinit() @@ -124,7 +124,7 @@ def test_renderer_block_code(): test_markup = ( '' 'true\n' - "\n" + "\n" "\n" ) @@ -140,20 +140,20 @@ def test_renderer_block_code_with_language(): '' 'whitespace\n' 'true\n' - "\n" + "\n" "\n" ) renderer = ConfluenceRenderer() - assert renderer.block_code(test_code, lang=test_language) == test_markup + assert renderer.block_code(test_code, info=test_language) == test_markup def test_renderer_header_sets_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_header, 1) + renderer.heading(test_header, 1) assert renderer.title == test_header @@ -162,7 +162,7 @@ def test_renderer_strips_header(): test_header = "this is a header" renderer = ConfluenceRenderer(strip_header=True) - result = renderer.header(test_header, 1) + result = renderer.heading(test_header, 1) assert result == "" @@ -171,7 +171,7 @@ def test_renderer_header_lower_level_does_not_set_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_header, 2) + renderer.heading(test_header, 2) assert renderer.title is None @@ -181,8 +181,8 @@ def test_renderer_header_later_level_sets_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_lower_header, 2) - renderer.header(test_header, 1) + renderer.heading(test_lower_header, 2) + renderer.heading(test_header, 1) assert renderer.title is test_header @@ -192,8 +192,8 @@ def test_renderer_header_only_sets_first_title(): test_second_header = "this is another header" renderer = ConfluenceRenderer() - renderer.header(test_header, 1) - renderer.header(test_second_header, 1) + renderer.heading(test_header, 1) + renderer.heading(test_second_header, 1) assert renderer.title is test_header @@ -207,7 +207,7 @@ def test_renderer_image_external(): renderer = ConfluenceRenderer() - assert renderer.image(test_image_src, "", "") == test_image_markup + assert renderer.image("", test_image_src, "") == test_image_markup assert not renderer.attachments @@ -223,7 +223,7 @@ def test_renderer_image_external_alt_and_title(): renderer = ConfluenceRenderer() assert ( - renderer.image(test_image_src, test_image_title, test_image_alt) + renderer.image(test_image_alt, test_image_src, test_image_title) == test_image_markup ) @@ -238,7 +238,7 @@ def test_renderer_image_internal_absolute(): renderer = ConfluenceRenderer() - assert renderer.image(test_image_src, "", "") == test_image_markup + assert renderer.image("", test_image_src, "") == test_image_markup assert renderer.attachments == [test_image_src] @@ -252,14 +252,5 @@ def test_renderer_image_internal_relative(): renderer = ConfluenceRenderer() - assert renderer.image(test_image_src, "", "") == test_image_markup + assert renderer.image("", test_image_src, "") == test_image_markup assert renderer.attachments == [test_image_src] - - -def test_renderer_remove_text_newlines(): - test_text = "This is a paragraph\nwith some newlines\nin it." - test_stripped_text = "This is a paragraph with some newlines in it." - - renderer = ConfluenceRenderer(remove_text_newlines=True) - - assert renderer.text(test_text) == test_stripped_text