From 6c9f3dd28da349d6b3fc77be127f863a0397fdd7 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Tue, 16 May 2023 18:08:37 +0100
Subject: [PATCH 01/12] Upgrade Mistune to version 3 (release candidate, so
 tentative)

- header renderer function renamed to heading and args changed
- link arguments changed
- block_code arguments changed
- image arguments changed and local filepath needs URL-decoding

NB work in progress - tests not fixed
---
 md2cf/confluence_renderer.py | 37 ++++++++++++++++++------------------
 md2cf/document.py            |  1 -
 setup.py                     |  2 +-
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py
index ed46aa4..a5d35cd 100644
--- a/md2cf/confluence_renderer.py
+++ b/md2cf/confluence_renderer.py
@@ -59,7 +59,7 @@ def append(self, child):
         self.children.append(child)
 
 
-class ConfluenceRenderer(mistune.Renderer):
+class ConfluenceRenderer(mistune.HTMLRenderer):
     def __init__(
         self,
         strip_header=False,
@@ -80,14 +80,14 @@ def reinit(self):
         self.relative_links = list()
         self.title = None
 
-    def header(self, text, level, raw=None):
+    def heading(self, text, level, **attrs):
         if self.title is None and level == 1:
             self.title = text
             # Don't duplicate page title as a header
             if self.strip_header:
                 return ""
 
-        return super(ConfluenceRenderer, self).header(text, level, raw=raw)
+        return super(ConfluenceRenderer, self).heading(text, level, **attrs)
 
     def structured_macro(self, name):
         return ConfluenceTag("structured-macro", attrib={"name": name})
@@ -102,8 +102,8 @@ def plain_text_body(self, text):
         body_tag.text = text
         return body_tag
 
-    def link(self, link, title, text):
-        parsed_link = urlparse.urlparse(link)
+    def link(self, text, url, title=None):
+        parsed_link = urlparse.urlparse(url)
         if self.enable_relative_links and (
             not parsed_link.scheme
             and not parsed_link.netloc
@@ -115,44 +115,43 @@ def link(self, link, title, text):
                 RelativeLink(
                     path=parsed_link.path,
                     replacement=replacement_link,
-                    original=link,
-                    escaped_original=mistune.escape_link(link),
+                    original=url,
+                    escaped_original=mistune.escape_link(url),
                 )
             )
-            link = replacement_link
-        return super(ConfluenceRenderer, self).link(link, title, text)
+            url = replacement_link
+        return super(ConfluenceRenderer, self).link(text, url, title)
 
     def text(self, text):
         if self.remove_text_newlines:
             text = text.replace("\n", " ")
-
         return super().text(text)
 
-    def block_code(self, code, lang=None):
+    def block_code(self, code, info=None):
         root_element = self.structured_macro("code")
-        if lang is not None:
-            lang_parameter = self.parameter(name="language", value=lang)
+        if info is not None:
+            lang_parameter = self.parameter(name="language", value=info)
             root_element.append(lang_parameter)
         root_element.append(self.parameter(name="linenumbers", value="true"))
         root_element.append(self.plain_text_body(code))
         return root_element.render()
 
-    def image(self, src, title, text):
-        attributes = {"alt": text}
+    def image(self, alt, url, title=None):
+        attributes = {"alt": alt}
         if title:
             attributes["title"] = title
 
         root_element = ConfluenceTag(name="image", attrib=attributes)
-        parsed_source = urlparse.urlparse(src)
+        parsed_source = urlparse.urlparse(url)
         if not parsed_source.netloc:
             # Local file, requires upload
-            basename = Path(src).name
+            basename = Path(parsed_source.path).name
             url_tag = ConfluenceTag(
                 "attachment", attrib={"filename": basename}, namespace="ri"
             )
-            self.attachments.append(src)
+            self.attachments.append(urlparse.unquote(parsed_source.path))
         else:
-            url_tag = ConfluenceTag("url", attrib={"value": src}, namespace="ri")
+            url_tag = ConfluenceTag("url", attrib={"value": url}, namespace="ri")
         root_element.append(url_tag)
 
         return root_element.render()
diff --git a/md2cf/document.py b/md2cf/document.py
index b0cce37..5b13753 100644
--- a/md2cf/document.py
+++ b/md2cf/document.py
@@ -268,7 +268,6 @@ def parse_page(
     enable_relative_links: bool = False,
 ) -> Page:
     renderer = ConfluenceRenderer(
-        use_xhtml=True,
         strip_header=strip_header,
         remove_text_newlines=remove_text_newlines,
         enable_relative_links=enable_relative_links,
diff --git a/setup.py b/setup.py
index 7d15fbd..f9ce788 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@
     install_requires=[
         "rich-argparse==1.0.0",
         "rich==13.0.1",
-        "mistune==0.8.4",
+        "mistune==3.0.0rc5",
         "chardet==5.1.0",
         "requests==2.28.2",
         "PyYAML==6.0",

From 5b9a222173a621140c3af704d507ebe7a74b24d2 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Wed, 17 May 2023 12:42:14 +0100
Subject: [PATCH 02/12] New version of mistune always escapes quotes using
 &quot; entity

---
 tests/functional/result.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/functional/result.xml b/tests/functional/result.xml
index ca22230..b56956b 100644
--- a/tests/functional/result.xml
+++ b/tests/functional/result.xml
@@ -41,16 +41,16 @@ inspiration for Markdown's syntax is the format of plain text email.</p>
 by one or more blank lines. (A blank line is any line that looks like a
 blank line -- a line containing nothing but spaces or tabs is considered
 blank.) Normal paragraphs should not be indented with spaces or tabs.</p>
-<p>The implication of the "one or more consecutive lines of text" rule is
-that Markdown supports "hard-wrapped" text paragraphs. This differs
+<p>The implication of the &quot;one or more consecutive lines of text&quot; rule is
+that Markdown supports &quot;hard-wrapped&quot; text paragraphs. This differs
 significantly from most other text-to-HTML formatters (including Movable
-Type's "Convert Line Breaks" option) which translate every line break
+Type's &quot;Convert Line Breaks&quot; option) which translate every line break
 character in a paragraph into a <code>&lt;br /&gt;</code> tag.</p>
 <p>When you <em>do</em> want to insert a <code>&lt;br /&gt;</code> break tag using Markdown, you
 end a line with two or more spaces, then type return.</p>
 <h3>Headers</h3>
 <p>Markdown supports two styles of headers, [Setext] [1] and [atx] [2].</p>
-<p>Optionally, you may "close" atx-style headers. This is purely
+<p>Optionally, you may &quot;close&quot; atx-style headers. This is purely
 cosmetic -- you can use this if you think it looks better. The
 closing hashes don't even need to match the number of hashes
 used to open the header. (The number of opening hashes

From c1536d3d3a630f31abbf63275b3c38f052029891 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Wed, 17 May 2023 12:44:07 +0100
Subject: [PATCH 03/12] Include demonstration of forcing a line break using two
 spaces

---
 .pre-commit-config.yaml     | 1 +
 tests/functional/result.xml | 3 ++-
 tests/functional/test.md    | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b5ed600..18ae735 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,6 +4,7 @@ repos:
     hooks:
       - id: trailing-whitespace
         exclude: .bumpversion.cfg
+        args: [--markdown-linebreak-ext=md]
       - id: end-of-file-fixer
         exclude: '.bumpversion.cfg'
       - id: check-yaml
diff --git a/tests/functional/result.xml b/tests/functional/result.xml
index b56956b..a8ceb5b 100644
--- a/tests/functional/result.xml
+++ b/tests/functional/result.xml
@@ -47,7 +47,8 @@ significantly from most other text-to-HTML formatters (including Movable
 Type's &quot;Convert Line Breaks&quot; option) which translate every line break
 character in a paragraph into a <code>&lt;br /&gt;</code> tag.</p>
 <p>When you <em>do</em> want to insert a <code>&lt;br /&gt;</code> break tag using Markdown, you
-end a line with two or more spaces, then type return.</p>
+end a line with two or more spaces, then type return, like...<br />
+this.</p>
 <h3>Headers</h3>
 <p>Markdown supports two styles of headers, [Setext] [1] and [atx] [2].</p>
 <p>Optionally, you may &quot;close&quot; atx-style headers. This is purely
diff --git a/tests/functional/test.md b/tests/functional/test.md
index 8aaab12..fe1b6ac 100644
--- a/tests/functional/test.md
+++ b/tests/functional/test.md
@@ -52,7 +52,8 @@ Type's "Convert Line Breaks" option) which translate every line break
 character in a paragraph into a `<br />` tag.
 
 When you *do* want to insert a `<br />` break tag using Markdown, you
-end a line with two or more spaces, then type return.
+end a line with two or more spaces, then type return, like...  
+this.
 
 ### Headers
 

From 899c989c52ecc1f93f3dbac6990584c4053c1a63 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Wed, 17 May 2023 17:24:12 +0100
Subject: [PATCH 04/12] New version of Mistune treats blockquotes separate by a
 blank line as separate elements

- new Mistune behaviour is correct
- we're trying to test a single blockquote with multiple paragraphs (where only the first line of each para has an angle bracket)
- so add the exrta angle bracket on the blank line, to correspond with the intended output
---
 tests/functional/test.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/functional/test.md b/tests/functional/test.md
index fe1b6ac..c842aa7 100644
--- a/tests/functional/test.md
+++ b/tests/functional/test.md
@@ -86,7 +86,7 @@ line of a hard-wrapped paragraph:
 > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
 consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
 Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
-
+>
 > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
 id sem consectetuer libero luctus adipiscing.
 

From 50034524b4f511254fb45d055bb92c779a9f5640 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Wed, 17 May 2023 17:34:20 +0100
Subject: [PATCH 05/12] New version of Mistune puts <blockquote> on a line of
 its own

- no change to semantics of the output markup
---
 tests/functional/result.xml | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/functional/result.xml b/tests/functional/result.xml
index a8ceb5b..91ebaa7 100644
--- a/tests/functional/result.xml
+++ b/tests/functional/result.xml
@@ -61,7 +61,8 @@ determines the header level.)</p>
 familiar with quoting passages of text in an email message, then you
 know how to create a blockquote in Markdown. It looks best if you hard
 wrap the text and put a <code>&gt;</code> before every line:</p>
-<blockquote><p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+<blockquote>
+<p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
 consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
 Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.</p>
 <p>Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
@@ -69,7 +70,8 @@ id sem consectetuer libero luctus adipiscing.</p>
 </blockquote>
 <p>Markdown allows you to be lazy and only put the <code>&gt;</code> before the first
 line of a hard-wrapped paragraph:</p>
-<blockquote><p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+<blockquote>
+<p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
 consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
 Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.</p>
 <p>Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
@@ -77,14 +79,17 @@ id sem consectetuer libero luctus adipiscing.</p>
 </blockquote>
 <p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
 adding additional levels of <code>&gt;</code>:</p>
-<blockquote><p>This is the first level of quoting.</p>
-<blockquote><p>This is nested blockquote.</p>
+<blockquote>
+<p>This is the first level of quoting.</p>
+<blockquote>
+<p>This is nested blockquote.</p>
 </blockquote>
 <p>Back to the first level.</p>
 </blockquote>
 <p>Blockquotes can contain other Markdown elements, including headers, lists,
 and code blocks:</p>
-<blockquote><h2>This is a header.</h2>
+<blockquote>
+<h2>This is a header.</h2>
 <ol>
 <li>This is the first list item.</li>
 <li>This is the second list item.</li>
@@ -189,7 +194,8 @@ sit amet, consectetuer adipiscing elit.</p>
 delimiters need to be indented:</p>
 <ul>
 <li><p>A list item with a blockquote:</p>
-<blockquote><p>This is a blockquote
+<blockquote>
+<p>This is a blockquote
 inside a list item.</p>
 </blockquote>
 </li>

From 579d09fce1ba3969c49b22995acf331067494e80 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Wed, 17 May 2023 17:55:06 +0100
Subject: [PATCH 06/12] The number of the first item in an ordered list affects
 the resulting markup

- there is a substantial difference betwen the [syntax previously assumed](https://daringfireball.net/projects/markdown/syntax#list) and the [CommonMark syntax](https://spec.commonmark.org/0.30/#start-number)
- Mistune now follows CommonMark so the example doc must change

Arguably the example doc here is not testing md2cf functionality and should be removed

- It's not particularly useful to test a lib that we depend on, unless we are testing something that matters to Confluence
- Even then, deviation from CommonMark is not helpful, though potentially we could flag a warning somehow
---
 tests/functional/result.xml | 13 ++++++-------
 tests/functional/test.md    | 11 +++++------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/tests/functional/result.xml b/tests/functional/result.xml
index 91ebaa7..c74bbb2 100644
--- a/tests/functional/result.xml
+++ b/tests/functional/result.xml
@@ -129,22 +129,21 @@ Quote Level from the Text menu.</p>
 <li>McHale</li>
 <li>Parish</li>
 </ol>
-<p>It's important to note that the actual numbers you use to mark the
-list have no effect on the HTML output Markdown produces. The HTML
-Markdown produces from the above list is:</p>
-<p>If you instead wrote the list in Markdown like this:</p>
+<p>Only the first number in a list has any effect.</p>
+<p>You can therefore write the same list in Markdown like this:</p>
 <ol>
 <li>Bird</li>
 <li>McHale</li>
 <li>Parish</li>
 </ol>
-<p>or even:</p>
-<ol>
+<p>You specify a different start number, but the numbers you use for
+subsequent items will be ignored:</p>
+<ol start="3">
 <li>Bird</li>
 <li>McHale</li>
 <li>Parish</li>
 </ol>
-<p>you'd get the exact same HTML output. The point is, if you want to,
+<p>The point is, if you want to,
 you can use ordinal numbers in your ordered Markdown lists, so that
 the numbers in your source match the numbers in your published HTML.
 But if you want to be lazy, you don't have to.</p>
diff --git a/tests/functional/test.md b/tests/functional/test.md
index c842aa7..0fd7ee3 100644
--- a/tests/functional/test.md
+++ b/tests/functional/test.md
@@ -145,23 +145,22 @@ Ordered lists use numbers followed by periods:
 2.  McHale
 3.  Parish
 
-It's important to note that the actual numbers you use to mark the
-list have no effect on the HTML output Markdown produces. The HTML
-Markdown produces from the above list is:
+Only the first number in a list has any effect.
 
-If you instead wrote the list in Markdown like this:
+You can therefore write the same list in Markdown like this:
 
 1.  Bird
 1.  McHale
 1.  Parish
 
-or even:
+You specify a different start number, but the numbers you use for
+subsequent items will be ignored:
 
 3. Bird
 1. McHale
 8. Parish
 
-you'd get the exact same HTML output. The point is, if you want to,
+The point is, if you want to,
 you can use ordinal numbers in your ordered Markdown lists, so that
 the numbers in your source match the numbers in your published HTML.
 But if you want to be lazy, you don't have to.

From 9a5e13dc01e3a29a5f5ef4d971b938ee672e201d Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Thu, 18 May 2023 10:57:20 +0100
Subject: [PATCH 07/12] Keep ConfluenceRenderer handling of code blocks same as
 Mistune MarkdownRenderer

- all code blocks end with a newline
- this means updating some examples in the functional test as they expected inconsistency
---
 md2cf/confluence_renderer.py |  2 ++
 tests/functional/result.xml  | 12 ++++++------
 tests/unit/test_renderer.py  |  4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py
index a5d35cd..0447c4f 100644
--- a/md2cf/confluence_renderer.py
+++ b/md2cf/confluence_renderer.py
@@ -133,6 +133,8 @@ def block_code(self, code, info=None):
             lang_parameter = self.parameter(name="language", value=info)
             root_element.append(lang_parameter)
         root_element.append(self.parameter(name="linenumbers", value="true"))
+        if code and code[-1] != "\n":
+            code += "\n"
         root_element.append(self.plain_text_body(code))
         return root_element.render()
 
diff --git a/tests/functional/result.xml b/tests/functional/result.xml
index c74bbb2..cb4bba4 100644
--- a/tests/functional/result.xml
+++ b/tests/functional/result.xml
@@ -96,7 +96,8 @@ and code blocks:</p>
 </ol>
 <p>Here's some example code:</p>
 <ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
-<ac:plain-text-body><![CDATA[return shell_exec("echo $input | $markdown_script");]]></ac:plain-text-body>
+<ac:plain-text-body><![CDATA[return shell_exec("echo $input | $markdown_script");
+]]></ac:plain-text-body>
 </ac:structured-macro>
 </blockquote>
 <p>Any decent text editor should make email-style quoting easy. For
@@ -204,7 +205,8 @@ to be indented <em>twice</em> -- 8 spaces or two tabs:</p>
 <ul>
 <li><p>A list item with a code block:</p>
 <ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
-<ac:plain-text-body><![CDATA[<code goes here>]]></ac:plain-text-body>
+<ac:plain-text-body><![CDATA[<code goes here>
+]]></ac:plain-text-body>
 </ac:structured-macro>
 </li>
 </ul>
@@ -218,7 +220,6 @@ block by at least 4 spaces or 1 tab.</p>
 <p>This is a normal paragraph:</p>
 <ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
 <ac:plain-text-body><![CDATA[This is a code block.
-
 ]]></ac:plain-text-body>
 </ac:structured-macro>
 <p>Here is an example of AppleScript:</p>
@@ -226,7 +227,6 @@ block by at least 4 spaces or 1 tab.</p>
 <ac:plain-text-body><![CDATA[tell application "Foo"
     beep
 end tell
-
 ]]></ac:plain-text-body>
 </ac:structured-macro>
 <p>A code block continues until it reaches a line that is not indented
@@ -240,7 +240,6 @@ ampersands and angle brackets. For example, this:</p>
 <ac:plain-text-body><![CDATA[<div class="footer">
     &copy; 2004 Foo Corporation
 </div>
-
 ]]></ac:plain-text-body>
 </ac:structured-macro>
 <p>Regular Markdown syntax is not processed within code blocks. E.g.,
@@ -249,7 +248,8 @@ it's also easy to use Markdown to write about Markdown's own syntax.</p>
 <ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
 <ac:plain-text-body><![CDATA[tell application "Foo"
     beep
-end tell]]></ac:plain-text-body>
+end tell
+]]></ac:plain-text-body>
 </ac:structured-macro>
 <h2>Span Elements</h2>
 <h3>Links</h3>
diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py
index 40c50fc..02cb714 100644
--- a/tests/unit/test_renderer.py
+++ b/tests/unit/test_renderer.py
@@ -124,7 +124,7 @@ def test_renderer_block_code():
     test_markup = (
         '<ac:structured-macro ac:name="code">'
         '<ac:parameter ac:name="linenumbers">true</ac:parameter>\n'
-        "<ac:plain-text-body><![CDATA[this is a piece of code]]></ac:plain-text-body>\n"
+        "<ac:plain-text-body><![CDATA[this is a piece of code\n]]></ac:plain-text-body>\n"
         "</ac:structured-macro>\n"
     )
 
@@ -140,7 +140,7 @@ def test_renderer_block_code_with_language():
         '<ac:structured-macro ac:name="code">'
         '<ac:parameter ac:name="language">whitespace</ac:parameter>\n'
         '<ac:parameter ac:name="linenumbers">true</ac:parameter>\n'
-        "<ac:plain-text-body><![CDATA[this is a piece of code]]></ac:plain-text-body>\n"
+        "<ac:plain-text-body><![CDATA[this is a piece of code\n]]></ac:plain-text-body>\n"
         "</ac:structured-macro>\n"
     )
 

From 35de663f0e767ea8838fb7e37fbf3561b85f0aaf Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Thu, 18 May 2023 12:09:48 +0100
Subject: [PATCH 08/12] Align implemention of block_code with that in Mistune
 HTMLRenderer

- add functional test of language tag on code block
- fix unit test for block_code given renamed parameter
---
 md2cf/confluence_renderer.py | 6 +++++-
 tests/functional/result.xml  | 6 ++++++
 tests/functional/test.md     | 6 ++++++
 tests/unit/test_renderer.py  | 2 +-
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py
index 0447c4f..a221b4d 100644
--- a/md2cf/confluence_renderer.py
+++ b/md2cf/confluence_renderer.py
@@ -4,6 +4,7 @@
 from typing import List, NamedTuple
 
 import mistune
+from mistune.util import safe_entity
 
 
 class RelativeLink(NamedTuple):
@@ -130,7 +131,10 @@ def text(self, text):
     def block_code(self, code, info=None):
         root_element = self.structured_macro("code")
         if info is not None:
-            lang_parameter = self.parameter(name="language", value=info)
+            info = safe_entity(info.strip())
+        if info:
+            lang = info.split(None, 1)[0]
+            lang_parameter = self.parameter(name="language", value=lang)
             root_element.append(lang_parameter)
         root_element.append(self.parameter(name="linenumbers", value="true"))
         if code and code[-1] != "\n":
diff --git a/tests/functional/result.xml b/tests/functional/result.xml
index cb4bba4..f96550a 100644
--- a/tests/functional/result.xml
+++ b/tests/functional/result.xml
@@ -251,6 +251,12 @@ it's also easy to use Markdown to write about Markdown's own syntax.</p>
 end tell
 ]]></ac:plain-text-body>
 </ac:structured-macro>
+<p>Indicate the language thus:</p>
+<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">python</ac:parameter>
+<ac:parameter ac:name="linenumbers">true</ac:parameter>
+<ac:plain-text-body><![CDATA[hello = "hello"
+]]></ac:plain-text-body>
+</ac:structured-macro>
 <h2>Span Elements</h2>
 <h3>Links</h3>
 <p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.</p>
diff --git a/tests/functional/test.md b/tests/functional/test.md
index 0fd7ee3..4cbe7e4 100644
--- a/tests/functional/test.md
+++ b/tests/functional/test.md
@@ -265,6 +265,12 @@ tell application "Foo"
 end tell
 ```
 
+Indicate the language thus:
+
+```python
+hello = "hello"
+```
+
 ## Span Elements
 
 ### Links
diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py
index 02cb714..ab0eb73 100644
--- a/tests/unit/test_renderer.py
+++ b/tests/unit/test_renderer.py
@@ -146,7 +146,7 @@ def test_renderer_block_code_with_language():
 
     renderer = ConfluenceRenderer()
 
-    assert renderer.block_code(test_code, lang=test_language) == test_markup
+    assert renderer.block_code(test_code, info=test_language) == test_markup
 
 
 def test_renderer_header_sets_title():

From 9fbb343ec88b97c2a736d2840d086c47fe0315ca Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Thu, 18 May 2023 12:11:09 +0100
Subject: [PATCH 09/12] Fix unit tests given renamed header/heading function

---
 tests/unit/test_renderer.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py
index ab0eb73..950e9a1 100644
--- a/tests/unit/test_renderer.py
+++ b/tests/unit/test_renderer.py
@@ -112,7 +112,7 @@ def test_tag_render_with_child_and_text():
 
 def test_renderer_reinit():
     renderer = ConfluenceRenderer()
-    renderer.header("this is a title", 1)
+    renderer.heading("this is a title", 1)
     assert renderer.title is not None
 
     renderer.reinit()
@@ -153,7 +153,7 @@ def test_renderer_header_sets_title():
     test_header = "this is a header"
     renderer = ConfluenceRenderer()
 
-    renderer.header(test_header, 1)
+    renderer.heading(test_header, 1)
 
     assert renderer.title == test_header
 
@@ -162,7 +162,7 @@ def test_renderer_strips_header():
     test_header = "this is a header"
     renderer = ConfluenceRenderer(strip_header=True)
 
-    result = renderer.header(test_header, 1)
+    result = renderer.heading(test_header, 1)
 
     assert result == ""
 
@@ -171,7 +171,7 @@ def test_renderer_header_lower_level_does_not_set_title():
     test_header = "this is a header"
     renderer = ConfluenceRenderer()
 
-    renderer.header(test_header, 2)
+    renderer.heading(test_header, 2)
 
     assert renderer.title is None
 
@@ -181,8 +181,8 @@ def test_renderer_header_later_level_sets_title():
     test_header = "this is a header"
     renderer = ConfluenceRenderer()
 
-    renderer.header(test_lower_header, 2)
-    renderer.header(test_header, 1)
+    renderer.heading(test_lower_header, 2)
+    renderer.heading(test_header, 1)
 
     assert renderer.title is test_header
 
@@ -192,8 +192,8 @@ def test_renderer_header_only_sets_first_title():
     test_second_header = "this is another header"
     renderer = ConfluenceRenderer()
 
-    renderer.header(test_header, 1)
-    renderer.header(test_second_header, 1)
+    renderer.heading(test_header, 1)
+    renderer.heading(test_second_header, 1)
 
     assert renderer.title is test_header
 

From 1474746abba4c01fb6add858e36add687d1dd0fa Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Thu, 18 May 2023 12:36:22 +0100
Subject: [PATCH 10/12] Adjust unit tests to account for parameter order change

---
 tests/unit/test_renderer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py
index 950e9a1..156a03e 100644
--- a/tests/unit/test_renderer.py
+++ b/tests/unit/test_renderer.py
@@ -207,7 +207,7 @@ def test_renderer_image_external():
 
     renderer = ConfluenceRenderer()
 
-    assert renderer.image(test_image_src, "", "") == test_image_markup
+    assert renderer.image("", test_image_src, "") == test_image_markup
     assert not renderer.attachments
 
 
@@ -223,7 +223,7 @@ def test_renderer_image_external_alt_and_title():
     renderer = ConfluenceRenderer()
 
     assert (
-        renderer.image(test_image_src, test_image_title, test_image_alt)
+        renderer.image(test_image_alt, test_image_src, test_image_title)
         == test_image_markup
     )
 
@@ -238,7 +238,7 @@ def test_renderer_image_internal_absolute():
 
     renderer = ConfluenceRenderer()
 
-    assert renderer.image(test_image_src, "", "") == test_image_markup
+    assert renderer.image("", test_image_src, "") == test_image_markup
     assert renderer.attachments == [test_image_src]
 
 
@@ -252,7 +252,7 @@ def test_renderer_image_internal_relative():
 
     renderer = ConfluenceRenderer()
 
-    assert renderer.image(test_image_src, "", "") == test_image_markup
+    assert renderer.image("", test_image_src, "") == test_image_markup
     assert renderer.attachments == [test_image_src]
 
 

From 457e923166771e481a1ce7f59f2ee343d79f9b67 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Thu, 18 May 2023 12:49:41 +0100
Subject: [PATCH 11/12] Add tests for both ways of handling newlines

- turns out this is easy to break as Mistune behaviour has changed
---
 tests/unit/test_document.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py
index 944a87b..bb5484f 100644
--- a/tests/unit/test_document.py
+++ b/tests/unit/test_document.py
@@ -1,3 +1,4 @@
+from io import StringIO
 from pathlib import Path
 
 import md2cf.document as doc
@@ -244,3 +245,26 @@ def test_get_document_frontmatter_empty():
 """
 
     assert doc.get_document_frontmatter(source_markdown.splitlines(keepends=True)) == {}
+
+
+def test_parse_page_with_newlines():
+    source_markdown = """
+Line1
+Line2
+"""
+    lines = StringIO(source_markdown).readlines()
+    assert (
+        doc.parse_page(lines, remove_text_newlines=False).body
+        == "<p>Line1\nLine2</p>\n"
+    )
+
+
+def test_parse_page_with_newlines_removed():
+    source_markdown = """
+Line1
+Line2
+"""
+    lines = StringIO(source_markdown).readlines()
+    assert (
+        doc.parse_page(lines, remove_text_newlines=True).body == "<p>Line1 Line2</p>\n"
+    )

From f3cb5daebfad6486ae5c525b56721913b0add3a4 Mon Sep 17 00:00:00 2001
From: George Lund <george.lund@digital.cabinet-office.gov.uk>
Date: Thu, 18 May 2023 13:14:24 +0100
Subject: [PATCH 12/12] Newline removal now has to be done in parsing rather
 than rendering due to the Mistune upgrade

- each text callback occurs with a single line/token so doesn't receive the newline
- instead we define a special InlineParser that handles soft break as spaces rather than newlines
---
 md2cf/confluence_renderer.py |  7 -------
 md2cf/document.py            | 14 ++++++++++++--
 tests/unit/test_renderer.py  |  9 ---------
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py
index a221b4d..16387fb 100644
--- a/md2cf/confluence_renderer.py
+++ b/md2cf/confluence_renderer.py
@@ -64,13 +64,11 @@ class ConfluenceRenderer(mistune.HTMLRenderer):
     def __init__(
         self,
         strip_header=False,
-        remove_text_newlines=False,
         enable_relative_links=False,
         **kwargs,
     ):
         super().__init__(**kwargs)
         self.strip_header = strip_header
-        self.remove_text_newlines = remove_text_newlines
         self.attachments = list()
         self.title = None
         self.enable_relative_links = enable_relative_links
@@ -123,11 +121,6 @@ def link(self, text, url, title=None):
             url = replacement_link
         return super(ConfluenceRenderer, self).link(text, url, title)
 
-    def text(self, text):
-        if self.remove_text_newlines:
-            text = text.replace("\n", " ")
-        return super().text(text)
-
     def block_code(self, code, info=None):
         root_element = self.structured_macro("code")
         if info is not None:
diff --git a/md2cf/document.py b/md2cf/document.py
index 5b13753..da05399 100644
--- a/md2cf/document.py
+++ b/md2cf/document.py
@@ -71,6 +71,12 @@ def __repr__(self):
         )
 
 
+class LineBreakIgnoringInlineParser(mistune.InlineParser):
+    def parse_softbreak(self, m, state) -> int:
+        state.append_token({"type": "text", "raw": " "})
+        return m.end()
+
+
 def find_non_empty_parent_path(
     current_dir: Path, folder_data: Dict[Path, Dict[str, Any]], default: Path
 ) -> Path:
@@ -269,10 +275,14 @@ def parse_page(
 ) -> Page:
     renderer = ConfluenceRenderer(
         strip_header=strip_header,
-        remove_text_newlines=remove_text_newlines,
         enable_relative_links=enable_relative_links,
     )
-    confluence_mistune = mistune.Markdown(renderer=renderer)
+    if remove_text_newlines:
+        inline_parser = LineBreakIgnoringInlineParser()
+    else:
+        inline_parser = mistune.InlineParser()
+
+    confluence_mistune = mistune.Markdown(renderer=renderer, inline=inline_parser)
     confluence_content = confluence_mistune("".join(markdown_lines))
 
     page = Page(
diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py
index 156a03e..8c3226e 100644
--- a/tests/unit/test_renderer.py
+++ b/tests/unit/test_renderer.py
@@ -254,12 +254,3 @@ def test_renderer_image_internal_relative():
 
     assert renderer.image("", test_image_src, "") == test_image_markup
     assert renderer.attachments == [test_image_src]
-
-
-def test_renderer_remove_text_newlines():
-    test_text = "This is a paragraph\nwith some newlines\nin it."
-    test_stripped_text = "This is a paragraph with some newlines in it."
-
-    renderer = ConfluenceRenderer(remove_text_newlines=True)
-
-    assert renderer.text(test_text) == test_stripped_text