Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ repos:
hooks:
- id: trailing-whitespace
exclude: .bumpversion.cfg
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer
exclude: '.bumpversion.cfg'
- id: check-yaml
Expand Down
48 changes: 23 additions & 25 deletions md2cf/confluence_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import List, NamedTuple

import mistune
from mistune.util import safe_entity


class RelativeLink(NamedTuple):
Expand Down Expand Up @@ -59,17 +60,15 @@ def append(self, child):
self.children.append(child)


class ConfluenceRenderer(mistune.Renderer):
class ConfluenceRenderer(mistune.HTMLRenderer):
def __init__(
self,
strip_header=False,
remove_text_newlines=False,
enable_relative_links=False,
**kwargs,
):
super().__init__(**kwargs)
self.strip_header = strip_header
self.remove_text_newlines = remove_text_newlines
self.attachments = list()
self.title = None
self.enable_relative_links = enable_relative_links
Expand All @@ -80,14 +79,14 @@ def reinit(self):
self.relative_links = list()
self.title = None

def header(self, text, level, raw=None):
def heading(self, text, level, **attrs):
if self.title is None and level == 1:
self.title = text
# Don't duplicate page title as a header
if self.strip_header:
return ""

return super(ConfluenceRenderer, self).header(text, level, raw=raw)
return super(ConfluenceRenderer, self).heading(text, level, **attrs)

def structured_macro(self, name):
return ConfluenceTag("structured-macro", attrib={"name": name})
Expand All @@ -102,8 +101,8 @@ def plain_text_body(self, text):
body_tag.text = text
return body_tag

def link(self, link, title, text):
parsed_link = urlparse.urlparse(link)
def link(self, text, url, title=None):
parsed_link = urlparse.urlparse(url)
if self.enable_relative_links and (
not parsed_link.scheme
and not parsed_link.netloc
Expand All @@ -115,44 +114,43 @@ def link(self, link, title, text):
RelativeLink(
path=parsed_link.path,
replacement=replacement_link,
original=link,
escaped_original=mistune.escape_link(link),
original=url,
escaped_original=mistune.escape_link(url),
)
)
link = replacement_link
return super(ConfluenceRenderer, self).link(link, title, text)
url = replacement_link
return super(ConfluenceRenderer, self).link(text, url, title)

def text(self, text):
if self.remove_text_newlines:
text = text.replace("\n", " ")

return super().text(text)

def block_code(self, code, lang=None):
def block_code(self, code, info=None):
root_element = self.structured_macro("code")
if lang is not None:
if info is not None:
info = safe_entity(info.strip())
if info:
lang = info.split(None, 1)[0]
lang_parameter = self.parameter(name="language", value=lang)
root_element.append(lang_parameter)
root_element.append(self.parameter(name="linenumbers", value="true"))
if code and code[-1] != "\n":
code += "\n"
root_element.append(self.plain_text_body(code))
return root_element.render()

def image(self, src, title, text):
attributes = {"alt": text}
def image(self, alt, url, title=None):
attributes = {"alt": alt}
if title:
attributes["title"] = title

root_element = ConfluenceTag(name="image", attrib=attributes)
parsed_source = urlparse.urlparse(src)
parsed_source = urlparse.urlparse(url)
if not parsed_source.netloc:
# Local file, requires upload
basename = Path(src).name
basename = Path(parsed_source.path).name
url_tag = ConfluenceTag(
"attachment", attrib={"filename": basename}, namespace="ri"
)
self.attachments.append(src)
self.attachments.append(urlparse.unquote(parsed_source.path))
else:
url_tag = ConfluenceTag("url", attrib={"value": src}, namespace="ri")
url_tag = ConfluenceTag("url", attrib={"value": url}, namespace="ri")
root_element.append(url_tag)

return root_element.render()
15 changes: 12 additions & 3 deletions md2cf/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ def __repr__(self):
)


class LineBreakIgnoringInlineParser(mistune.InlineParser):
def parse_softbreak(self, m, state) -> int:
state.append_token({"type": "text", "raw": " "})
return m.end()


def find_non_empty_parent_path(
current_dir: Path, folder_data: Dict[Path, Dict[str, Any]], default: Path
) -> Path:
Expand Down Expand Up @@ -268,12 +274,15 @@ def parse_page(
enable_relative_links: bool = False,
) -> Page:
renderer = ConfluenceRenderer(
use_xhtml=True,
strip_header=strip_header,
remove_text_newlines=remove_text_newlines,
enable_relative_links=enable_relative_links,
)
confluence_mistune = mistune.Markdown(renderer=renderer)
if remove_text_newlines:
inline_parser = LineBreakIgnoringInlineParser()
else:
inline_parser = mistune.InlineParser()

confluence_mistune = mistune.Markdown(renderer=renderer, inline=inline_parser)
confluence_content = confluence_mistune("".join(markdown_lines))

page = Page(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
install_requires=[
"rich-argparse==1.0.0",
"rich==13.0.1",
"mistune==0.8.4",
"mistune==3.0.0rc5",
"chardet==5.1.0",
"requests==2.28.2",
"PyYAML==6.0",
Expand Down
60 changes: 36 additions & 24 deletions tests/functional/result.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,17 @@ inspiration for Markdown's syntax is the format of plain text email.</p>
by one or more blank lines. (A blank line is any line that looks like a
blank line -- a line containing nothing but spaces or tabs is considered
blank.) Normal paragraphs should not be indented with spaces or tabs.</p>
<p>The implication of the "one or more consecutive lines of text" rule is
that Markdown supports "hard-wrapped" text paragraphs. This differs
<p>The implication of the &quot;one or more consecutive lines of text&quot; rule is
that Markdown supports &quot;hard-wrapped&quot; text paragraphs. This differs
significantly from most other text-to-HTML formatters (including Movable
Type's "Convert Line Breaks" option) which translate every line break
Type's &quot;Convert Line Breaks&quot; option) which translate every line break
character in a paragraph into a <code>&lt;br /&gt;</code> tag.</p>
<p>When you <em>do</em> want to insert a <code>&lt;br /&gt;</code> break tag using Markdown, you
end a line with two or more spaces, then type return.</p>
end a line with two or more spaces, then type return, like...<br />
this.</p>
<h3>Headers</h3>
<p>Markdown supports two styles of headers, [Setext] [1] and [atx] [2].</p>
<p>Optionally, you may "close" atx-style headers. This is purely
<p>Optionally, you may &quot;close&quot; atx-style headers. This is purely
cosmetic -- you can use this if you think it looks better. The
closing hashes don't even need to match the number of hashes
used to open the header. (The number of opening hashes
Expand All @@ -60,37 +61,43 @@ determines the header level.)</p>
familiar with quoting passages of text in an email message, then you
know how to create a blockquote in Markdown. It looks best if you hard
wrap the text and put a <code>&gt;</code> before every line:</p>
<blockquote><p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
<blockquote>
<p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.</p>
<p>Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.</p>
</blockquote>
<p>Markdown allows you to be lazy and only put the <code>&gt;</code> before the first
line of a hard-wrapped paragraph:</p>
<blockquote><p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
<blockquote>
<p>This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.</p>
<p>Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.</p>
</blockquote>
<p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
adding additional levels of <code>&gt;</code>:</p>
<blockquote><p>This is the first level of quoting.</p>
<blockquote><p>This is nested blockquote.</p>
<blockquote>
<p>This is the first level of quoting.</p>
<blockquote>
<p>This is nested blockquote.</p>
</blockquote>
<p>Back to the first level.</p>
</blockquote>
<p>Blockquotes can contain other Markdown elements, including headers, lists,
and code blocks:</p>
<blockquote><h2>This is a header.</h2>
<blockquote>
<h2>This is a header.</h2>
<ol>
<li>This is the first list item.</li>
<li>This is the second list item.</li>
</ol>
<p>Here's some example code:</p>
<ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
<ac:plain-text-body><![CDATA[return shell_exec("echo $input | $markdown_script");]]></ac:plain-text-body>
<ac:plain-text-body><![CDATA[return shell_exec("echo $input | $markdown_script");
]]></ac:plain-text-body>
</ac:structured-macro>
</blockquote>
<p>Any decent text editor should make email-style quoting easy. For
Expand Down Expand Up @@ -123,22 +130,21 @@ Quote Level from the Text menu.</p>
<li>McHale</li>
<li>Parish</li>
</ol>
<p>It's important to note that the actual numbers you use to mark the
list have no effect on the HTML output Markdown produces. The HTML
Markdown produces from the above list is:</p>
<p>If you instead wrote the list in Markdown like this:</p>
<p>Only the first number in a list has any effect.</p>
<p>You can therefore write the same list in Markdown like this:</p>
<ol>
<li>Bird</li>
<li>McHale</li>
<li>Parish</li>
</ol>
<p>or even:</p>
<ol>
<p>You specify a different start number, but the numbers you use for
subsequent items will be ignored:</p>
<ol start="3">
<li>Bird</li>
<li>McHale</li>
<li>Parish</li>
</ol>
<p>you'd get the exact same HTML output. The point is, if you want to,
<p>The point is, if you want to,
you can use ordinal numbers in your ordered Markdown lists, so that
the numbers in your source match the numbers in your published HTML.
But if you want to be lazy, you don't have to.</p>
Expand Down Expand Up @@ -188,7 +194,8 @@ sit amet, consectetuer adipiscing elit.</p>
delimiters need to be indented:</p>
<ul>
<li><p>A list item with a blockquote:</p>
<blockquote><p>This is a blockquote
<blockquote>
<p>This is a blockquote
inside a list item.</p>
</blockquote>
</li>
Expand All @@ -198,7 +205,8 @@ to be indented <em>twice</em> -- 8 spaces or two tabs:</p>
<ul>
<li><p>A list item with a code block:</p>
<ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
<ac:plain-text-body><![CDATA[<code goes here>]]></ac:plain-text-body>
<ac:plain-text-body><![CDATA[<code goes here>
]]></ac:plain-text-body>
</ac:structured-macro>
</li>
</ul>
Expand All @@ -212,15 +220,13 @@ block by at least 4 spaces or 1 tab.</p>
<p>This is a normal paragraph:</p>
<ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
<ac:plain-text-body><![CDATA[This is a code block.

]]></ac:plain-text-body>
</ac:structured-macro>
<p>Here is an example of AppleScript:</p>
<ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
<ac:plain-text-body><![CDATA[tell application "Foo"
beep
end tell

]]></ac:plain-text-body>
</ac:structured-macro>
<p>A code block continues until it reaches a line that is not indented
Expand All @@ -234,7 +240,6 @@ ampersands and angle brackets. For example, this:</p>
<ac:plain-text-body><![CDATA[<div class="footer">
&copy; 2004 Foo Corporation
</div>

]]></ac:plain-text-body>
</ac:structured-macro>
<p>Regular Markdown syntax is not processed within code blocks. E.g.,
Expand All @@ -243,7 +248,14 @@ it's also easy to use Markdown to write about Markdown's own syntax.</p>
<ac:structured-macro ac:name="code"><ac:parameter ac:name="linenumbers">true</ac:parameter>
<ac:plain-text-body><![CDATA[tell application "Foo"
beep
end tell]]></ac:plain-text-body>
end tell
]]></ac:plain-text-body>
</ac:structured-macro>
<p>Indicate the language thus:</p>
<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">python</ac:parameter>
<ac:parameter ac:name="linenumbers">true</ac:parameter>
<ac:plain-text-body><![CDATA[hello = "hello"
]]></ac:plain-text-body>
</ac:structured-macro>
<h2>Span Elements</h2>
<h3>Links</h3>
Expand Down
22 changes: 14 additions & 8 deletions tests/functional/test.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ Type's "Convert Line Breaks" option) which translate every line break
character in a paragraph into a `<br />` tag.

When you *do* want to insert a `<br />` break tag using Markdown, you
end a line with two or more spaces, then type return.
end a line with two or more spaces, then type return, like...
this.

### Headers

Expand Down Expand Up @@ -85,7 +86,7 @@ line of a hard-wrapped paragraph:
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

>
> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.

Expand Down Expand Up @@ -144,23 +145,22 @@ Ordered lists use numbers followed by periods:
2. McHale
3. Parish

It's important to note that the actual numbers you use to mark the
list have no effect on the HTML output Markdown produces. The HTML
Markdown produces from the above list is:
Only the first number in a list has any effect.

If you instead wrote the list in Markdown like this:
You can therefore write the same list in Markdown like this:

1. Bird
1. McHale
1. Parish

or even:
You specify a different start number, but the numbers you use for
subsequent items will be ignored:

3. Bird
1. McHale
8. Parish

you'd get the exact same HTML output. The point is, if you want to,
The point is, if you want to,
you can use ordinal numbers in your ordered Markdown lists, so that
the numbers in your source match the numbers in your published HTML.
But if you want to be lazy, you don't have to.
Expand Down Expand Up @@ -265,6 +265,12 @@ tell application "Foo"
end tell
```

Indicate the language thus:

```python
hello = "hello"
```

## Span Elements

### Links
Expand Down
Loading