From db33636f96129748e5208bb54d5864df8a6ea275 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Wed, 1 Dec 2021 22:24:13 -0500 Subject: [PATCH 001/247] Update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index d79074c8..f26ff577 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.2 (not yet released) - [pull #408] Fix for fenced code blocks issue #396 +- [pull #410] Be more strict on auto linking urls, RE DOS fix ## python-markdown2 2.4.1 From 79fdd50a55ea47e2246a1269c21611fb14d7fff9 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Wed, 1 Dec 2021 22:50:38 -0500 Subject: [PATCH 002/247] prepare for 2.4.2 release --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index f26ff577..935f210b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ # python-markdown2 Changelog -## python-markdown2 2.4.2 (not yet released) +## python-markdown2 2.4.2 - [pull #408] Fix for fenced code blocks issue #396 - [pull #410] Be more strict on auto linking urls, RE DOS fix From 24365bc9ed059b4b44f6008c796852635597d9e2 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Wed, 1 Dec 2021 22:50:55 -0500 Subject: [PATCH 003/247] prep for future dev --- CHANGES.md | 5 +++++ lib/markdown2.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 935f210b..2df9a681 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # python-markdown2 Changelog +## python-markdown2 2.4.3 (not yet released) + +(nothing yet) + + ## python-markdown2 2.4.2 - [pull #408] Fix for fenced code blocks issue #396 diff --git a/lib/markdown2.py b/lib/markdown2.py index 634c0987..fdc296db 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -97,7 +97,7 @@ # not yet sure if there implications with this. Compare 'pydoc sre' # and 'perldoc perlre'. -__version_info__ = (2, 4, 2) +__version_info__ = (2, 4, 3) __version__ = '.'.join(map(str, __version_info__)) __author__ = "Trent Mick" From 28f210c8d4ec9193b72197d5c730abc49d475762 Mon Sep 17 00:00:00 2001 From: antonio Date: Tue, 1 Feb 2022 22:03:55 -0500 Subject: [PATCH 004/247] dedent multiline meta text --- lib/markdown2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index fdc296db..03020fac 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -516,7 +516,7 @@ def parse_structured_value(value): # Multiline value if v[:3] == " >\n": - self.metadata[k.strip()] = v[3:].strip() + self.metadata[k.strip()] = _dedent(v[3:]).strip() # Empty value elif v == "\n": From b969f4128a0f5931b1395c0ad25e783ebfab3267 Mon Sep 17 00:00:00 2001 From: antonio Date: Tue, 1 Feb 2022 22:04:21 -0500 Subject: [PATCH 005/247] add test case --- test/tm-cases/metadata.metadata | 2 +- test/tm-cases/metadata.text | 1 + test/tm-cases/metadata2.metadata | 2 +- test/tm-cases/metadata2.text | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/tm-cases/metadata.metadata b/test/tm-cases/metadata.metadata index 1ee2c4fe..a0180539 100644 --- a/test/tm-cases/metadata.metadata +++ b/test/tm-cases/metadata.metadata @@ -4,7 +4,7 @@ "And": "some, cvs, data, which, you, must, parse, yourself", "this-is": "a hyphen test", "empty": "", - "and some": "long value\n that goes multiline", + "and some": "long value\n with complex indentation\nthat goes multiline", "another": "example", "alist": ["a", "b", "c"], "adict": {"key": "foo", "a nested list": ["one", "two", "Even multiline strings are allowed\n in nested structured data\n if linebreaks and indent are respected !", {"subkey": "and another dict in a list"}, "but one-liners remains: simple strings"]} diff --git a/test/tm-cases/metadata.text b/test/tm-cases/metadata.text index c12494df..60dc7b15 100644 --- a/test/tm-cases/metadata.text +++ b/test/tm-cases/metadata.text @@ -6,6 +6,7 @@ this-is : a hyphen test empty : and some: > long value + with complex indentation that goes multiline another: example alist: diff --git a/test/tm-cases/metadata2.metadata b/test/tm-cases/metadata2.metadata index 172822d9..3b392f62 100644 --- a/test/tm-cases/metadata2.metadata +++ b/test/tm-cases/metadata2.metadata @@ -4,6 +4,6 @@ "And": "some, cvs, data, which, you, must, parse, yourself", "this-is": "a hyphen test", "empty": "", - "another long": "long value\n that goes multiline", + "another long": "long value\n with complex indentation\nthat goes multiline", "another": "example" } diff --git a/test/tm-cases/metadata2.text b/test/tm-cases/metadata2.text index 13579f12..b59d3121 100644 --- a/test/tm-cases/metadata2.text +++ b/test/tm-cases/metadata2.text @@ -5,6 +5,7 @@ this-is : a hyphen test empty : another long: > long value + with complex indentation that goes multiline another: example From cd2957733bd82f69a3c87fafb4d9becf1e9cd605 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Sat, 5 Feb 2022 15:01:59 -0500 Subject: [PATCH 006/247] Changes and contributors --- CHANGES.md | 2 +- CONTRIBUTORS.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 2df9a681..7c318bbb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,7 @@ ## python-markdown2 2.4.3 (not yet released) -(nothing yet) +- [pull #413] Fix meta indentation ## python-markdown2 2.4.2 diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 09fe9626..299b97d2 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -50,3 +50,4 @@ André Nasturas (github.com/andrenasturas) Denis Kasak (github.com/dkasak) Maximilian Hils (github.com/mhils) BarkeH (github.com/BarkeH) +cav71 (github.com/cav71) From cb22a18aa58381a4898ab23d85b1143bfef14c91 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 9 Feb 2022 12:24:35 +0000 Subject: [PATCH 007/247] Fix code surrounded by blank lines inside quoted fenced code blocks being treated as another code block. If you were using the `fenced-code-blocks` extension and you had code that was indented (eg: inside an if statement) and surrounded by blank lines, it would be detected as a code block, regardless of whether it was already inside a code block. EG: > ```python > if True: > print() > > print() # this line would register as a code block > > print() > ``` This is because the regex look-ahead being used to check for a `` tag would only check the next HTML tag it found, which would usually be a `` tag if syntax highlighting was being used. The new look-ahead checks the next HTML tag that isn't a `` tag to see if it's a `` tag --- lib/markdown2.py | 2 +- ...ode_blocks_whitespace_around_indented_lines.html | 13 +++++++++++++ ...ode_blocks_whitespace_around_indented_lines.opts | 1 + ...ode_blocks_whitespace_around_indented_lines.tags | 1 + ...ode_blocks_whitespace_around_indented_lines.text | 12 ++++++++++++ 5 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.html create mode 100644 test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.opts create mode 100644 test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.tags create mode 100644 test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 03020fac..0680bcc3 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1922,7 +1922,7 @@ def _do_code_blocks(self, text): ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc # Lookahead to make sure this block isn't already in a code block. # Needed when syntax highlighting is being used. - (?![^<]*\) + (?!([^<]|<(/?)span)*\) ''' % (self.tab_width, self.tab_width), re.M | re.X) return code_block_re.sub(self._code_block_sub, text) diff --git a/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.html b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.html new file mode 100644 index 00000000..6f344f05 --- /dev/null +++ b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.html @@ -0,0 +1,13 @@ +

Example:

+ +
+
if True:
+    print()
+
+    print()
+
+    print()
+
+    print()
+
+
diff --git a/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.opts b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.opts new file mode 100644 index 00000000..91560387 --- /dev/null +++ b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks"]} \ No newline at end of file diff --git a/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.tags b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.tags new file mode 100644 index 00000000..12bca88c --- /dev/null +++ b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.tags @@ -0,0 +1 @@ +extra fenced-code-blocks pygments \ No newline at end of file diff --git a/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.text b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.text new file mode 100644 index 00000000..a77adaca --- /dev/null +++ b/test/tm-cases/quoted_fenced_code_blocks_whitespace_around_indented_lines.text @@ -0,0 +1,12 @@ +### Example: + +> ```python +> if True: +> print() +> +> print() +> +> print() +> +> print() +> ``` \ No newline at end of file From 430349272fa16e523b3b75805fa87c3e7fc7f877 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Mon, 14 Feb 2022 20:35:50 -0500 Subject: [PATCH 008/247] Changes and contributors --- CHANGES.md | 1 + CONTRIBUTORS.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 7c318bbb..f3e977d0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## python-markdown2 2.4.3 (not yet released) - [pull #413] Fix meta indentation +- [pull #414] Fix code surrounded by blank lines inside blockquote fenced code blocks ## python-markdown2 2.4.2 diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 299b97d2..7d8a1574 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -51,3 +51,4 @@ Denis Kasak (github.com/dkasak) Maximilian Hils (github.com/mhils) BarkeH (github.com/BarkeH) cav71 (github.com/cav71) +Crozzers (github.com/Crozzers) From 82f1376238700be925f364dca87d64b5c6807617 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 10 Mar 2022 15:44:43 +0000 Subject: [PATCH 009/247] Fix issue#399 --- lib/markdown2.py | 2 +- .../inline_code_pipe_within_table.html | 21 +++++++++++++++++++ .../inline_code_pipe_within_table.opts | 1 + .../inline_code_pipe_within_table.tags | 1 + .../inline_code_pipe_within_table.text | 4 ++++ 5 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/inline_code_pipe_within_table.html create mode 100644 test/tm-cases/inline_code_pipe_within_table.opts create mode 100644 test/tm-cases/inline_code_pipe_within_table.tags create mode 100644 test/tm-cases/inline_code_pipe_within_table.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 0680bcc3..46dd38f2 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1059,7 +1059,7 @@ def _prepare_pyshell_blocks(self, text): def _table_sub(self, match): trim_space_re = '^[ \t\n]+|[ \t\n]+$' trim_bar_re = r'^\||\|$' - split_bar_re = r'^\||(? + + + Sign + Operator name + Description + + + + + & + Bitwise and + Bitwise and between two integer values + + + | + Bitwise or + Bitwise or between two integer values + + + diff --git a/test/tm-cases/inline_code_pipe_within_table.opts b/test/tm-cases/inline_code_pipe_within_table.opts new file mode 100644 index 00000000..9625694e --- /dev/null +++ b/test/tm-cases/inline_code_pipe_within_table.opts @@ -0,0 +1 @@ +{"extras": ["tables"]} \ No newline at end of file diff --git a/test/tm-cases/inline_code_pipe_within_table.tags b/test/tm-cases/inline_code_pipe_within_table.tags new file mode 100644 index 00000000..8a4af592 --- /dev/null +++ b/test/tm-cases/inline_code_pipe_within_table.tags @@ -0,0 +1 @@ +extra tables \ No newline at end of file diff --git a/test/tm-cases/inline_code_pipe_within_table.text b/test/tm-cases/inline_code_pipe_within_table.text new file mode 100644 index 00000000..f1e8aa26 --- /dev/null +++ b/test/tm-cases/inline_code_pipe_within_table.text @@ -0,0 +1,4 @@ +| Sign | Operator name | Description | +|---|---|---| +| `&` | Bitwise and | Bitwise and between two integer values | +| `|` | Bitwise or | Bitwise or between two integer values | \ No newline at end of file From 678467428201d4625c2955add5ef33c7d2d8311e Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 10 Mar 2022 15:44:43 +0000 Subject: [PATCH 010/247] Fix issue#327 --- lib/markdown2.py | 10 ++++---- .../tm-cases/fenced_code_blocks_issue327.html | 22 ++++++++++++++++++ .../tm-cases/fenced_code_blocks_issue327.opts | 1 + .../tm-cases/fenced_code_blocks_issue327.tags | 1 + .../tm-cases/fenced_code_blocks_issue327.text | 23 +++++++++++++++++++ 5 files changed, 52 insertions(+), 5 deletions(-) create mode 100644 test/tm-cases/fenced_code_blocks_issue327.html create mode 100644 test/tm-cases/fenced_code_blocks_issue327.opts create mode 100644 test/tm-cases/fenced_code_blocks_issue327.tags create mode 100644 test/tm-cases/fenced_code_blocks_issue327.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 0680bcc3..b444bf4d 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1844,10 +1844,10 @@ def wrap(self, source, outfile): def _code_block_sub(self, match, is_fenced_code_block=False): lexer_name = None if is_fenced_code_block: - lexer_name = match.group(1) + lexer_name = match.group(2) if lexer_name: formatter_opts = self.extras['fenced-code-blocks'] or {} - codeblock = match.group(2) + codeblock = match.group(3) codeblock = codeblock[:-1] # drop one trailing newline else: codeblock = match.group(1) @@ -1929,9 +1929,9 @@ def _do_code_blocks(self, text): _fenced_code_block_re = re.compile(r''' (?:\n+|\A\n?) - ^```\s{0,99}?([\w+-]+)?\s{0,99}?\n # opening fence, $1 = optional lang - (.*?) # $2 = code block content - ^```[ \t]*\n # closing fence + (^`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n # $1 = opening fence (captured for back-referencing), $2 = optional lang + (.*?) # $3 = code block content + \1[ \t]*\n # closing fence ''', re.M | re.X | re.S) def _fenced_code_block_sub(self, match): diff --git a/test/tm-cases/fenced_code_blocks_issue327.html b/test/tm-cases/fenced_code_blocks_issue327.html new file mode 100644 index 00000000..16cfc633 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue327.html @@ -0,0 +1,22 @@ +

Inner code blocks should not render as code blocks

+ +
```cpp
+int x = 10;
+```
+
+ +

Without language specifier

+ +
```
+int x = 10;
+```
+
+ +

Double nesting

+ +
````
+```cpp
+int x = 10;
+```
+````
+
diff --git a/test/tm-cases/fenced_code_blocks_issue327.opts b/test/tm-cases/fenced_code_blocks_issue327.opts new file mode 100644 index 00000000..91560387 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue327.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks"]} \ No newline at end of file diff --git a/test/tm-cases/fenced_code_blocks_issue327.tags b/test/tm-cases/fenced_code_blocks_issue327.tags new file mode 100644 index 00000000..2d244483 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue327.tags @@ -0,0 +1 @@ +extra fenced-code-blocks \ No newline at end of file diff --git a/test/tm-cases/fenced_code_blocks_issue327.text b/test/tm-cases/fenced_code_blocks_issue327.text new file mode 100644 index 00000000..88ad0c68 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue327.text @@ -0,0 +1,23 @@ +Inner code blocks should not render as code blocks + +```` +```cpp +int x = 10; +``` +```` + +Without language specifier +```` +``` +int x = 10; +``` +```` + +Double nesting +````` +```` +```cpp +int x = 10; +``` +```` +````` \ No newline at end of file From 3d7261bb06de3aeae37596674c9b6528c59834ef Mon Sep 17 00:00:00 2001 From: Crozzers Date: Fri, 11 Mar 2022 15:57:39 +0000 Subject: [PATCH 011/247] Fix issue #400 --- lib/markdown2.py | 2 +- test/tm-cases/hr_uniform_characters.html | 48 +++++++++++++++++++++++ test/tm-cases/hr_uniform_characters.text | 49 ++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/hr_uniform_characters.html create mode 100644 test/tm-cases/hr_uniform_characters.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 0680bcc3..8afcb215 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -986,7 +986,7 @@ def _strip_footnote_definitions(self, text): re.X | re.M) return footnote_def_re.sub(self._extract_footnote_def_sub, text) - _hr_re = re.compile(r'^[ ]{0,3}([-_*][ ]{0,2}){3,}$', re.M) + _hr_re = re.compile(r'^[ ]{0,3}([-_*])[ ]{0,2}(\1[ ]{0,2}){2,}$', re.M) def _run_block_gamut(self, text): # These are all the transformations that form block-level diff --git a/test/tm-cases/hr_uniform_characters.html b/test/tm-cases/hr_uniform_characters.html new file mode 100644 index 00000000..2e5a7b95 --- /dev/null +++ b/test/tm-cases/hr_uniform_characters.html @@ -0,0 +1,48 @@ +

Horizontal rules should probably consist of all of the same characters +EG:

+ +
+ +

Or

+ +
+ +

Or

+ +
+ +

But not any of:

+ +

--*

+ +

-*-

+ +

-**

+ +

*--

+ +

-

+ +

**-

+ +

**_

+ +

_

+ +

*__

+ +

_**

+ +

*

+ +

__-

+ +

-

+ +

_--

+ +

-__

+ +

-_-

+ +

--_

diff --git a/test/tm-cases/hr_uniform_characters.text b/test/tm-cases/hr_uniform_characters.text new file mode 100644 index 00000000..8d5b69d8 --- /dev/null +++ b/test/tm-cases/hr_uniform_characters.text @@ -0,0 +1,49 @@ +Horizontal rules should probably consist of all of the same characters +EG: + +*** + +Or + +--- + +Or + +___ + + +But not any of: + +--* + +-*- + +-** + +*-- + +*-* + +**- + +**_ + +*_* + +*__ + +_** + +_*_ + +__- + +_-_ + +_-- + +-__ + +-_- + +--_ \ No newline at end of file From 8a009c93a52e3979864fb0b27a5e90e8a12136f1 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Sun, 13 Mar 2022 21:22:03 -0400 Subject: [PATCH 012/247] Update CHANGES.md --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index f3e977d0..efaa9186 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,9 @@ - [pull #413] Fix meta indentation - [pull #414] Fix code surrounded by blank lines inside blockquote fenced code blocks +- [pull #417] Fix inline code pipe symbol within tables (issue #399) +- [pull #418] Fix code block parsing error (issue #327) +- [pull #419] Fix hr block created when not supposed to (issue #400) ## python-markdown2 2.4.2 From bfb072e4eb8eb2d7324317e9e61eee1c7869dbd4 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Tue, 15 Mar 2022 20:39:42 +0000 Subject: [PATCH 013/247] Fix #355 Issue was the regex for detecting fenced code blocks would detect the newline char preceding AND following the code block, meaning that code blocks seperated by a single newline char would not match as the regex wouldn't allow any overlapping. New regex uses a look-behind to find such newline chars --- lib/markdown2.py | 2 +- test/tm-cases/fenced_code_blocks_issue355.html | 10 ++++++++++ test/tm-cases/fenced_code_blocks_issue355.opts | 1 + test/tm-cases/fenced_code_blocks_issue355.tags | 1 + test/tm-cases/fenced_code_blocks_issue355.text | 10 ++++++++++ 5 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/fenced_code_blocks_issue355.html create mode 100644 test/tm-cases/fenced_code_blocks_issue355.opts create mode 100644 test/tm-cases/fenced_code_blocks_issue355.tags create mode 100644 test/tm-cases/fenced_code_blocks_issue355.text diff --git a/lib/markdown2.py b/lib/markdown2.py index cfbe3b92..0184b468 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1928,7 +1928,7 @@ def _do_code_blocks(self, text): return code_block_re.sub(self._code_block_sub, text) _fenced_code_block_re = re.compile(r''' - (?:\n+|\A\n?) + (?:\n+|\A\n?|(?<=\n)) (^`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n # $1 = opening fence (captured for back-referencing), $2 = optional lang (.*?) # $3 = code block content \1[ \t]*\n # closing fence diff --git a/test/tm-cases/fenced_code_blocks_issue355.html b/test/tm-cases/fenced_code_blocks_issue355.html new file mode 100644 index 00000000..66cc154e --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue355.html @@ -0,0 +1,10 @@ +
some code block
+
+ +
yet another code block
+
+ +

new line:

+ +
code everywhere
+
diff --git a/test/tm-cases/fenced_code_blocks_issue355.opts b/test/tm-cases/fenced_code_blocks_issue355.opts new file mode 100644 index 00000000..91560387 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue355.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks"]} \ No newline at end of file diff --git a/test/tm-cases/fenced_code_blocks_issue355.tags b/test/tm-cases/fenced_code_blocks_issue355.tags new file mode 100644 index 00000000..12bca88c --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue355.tags @@ -0,0 +1 @@ +extra fenced-code-blocks pygments \ No newline at end of file diff --git a/test/tm-cases/fenced_code_blocks_issue355.text b/test/tm-cases/fenced_code_blocks_issue355.text new file mode 100644 index 00000000..c7e33b1c --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue355.text @@ -0,0 +1,10 @@ +```python +some code block +``` +``` +yet another code block +``` +new line: +``` +code everywhere +``` \ No newline at end of file From f03c6dfcb02791c90948e36e1eab1f7b6fa8873d Mon Sep 17 00:00:00 2001 From: Crozzers Date: Tue, 15 Mar 2022 23:35:30 +0000 Subject: [PATCH 014/247] Fix #369 This should also probably fix #412 as well --- lib/markdown2.py | 5 +++-- test/tm-cases/backslash_removed_by_adjacent_backtick.html | 7 +++++++ test/tm-cases/backslash_removed_by_adjacent_backtick.text | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 test/tm-cases/backslash_removed_by_adjacent_backtick.html create mode 100644 test/tm-cases/backslash_removed_by_adjacent_backtick.text diff --git a/lib/markdown2.py b/lib/markdown2.py index cfbe3b92..4e93eedf 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -256,6 +256,7 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None, self.cli = cli self._escape_table = g_escape_table.copy() + self._code_table = {} if "smarty-pants" in self.extras: self._escape_table['"'] = _hash_text('"') self._escape_table["'"] = _hash_text("'") @@ -2005,7 +2006,7 @@ def _encode_code(self, text): for before, after in replacements: text = text.replace(before, after) hashed = _hash_text(text) - self._escape_table[text] = hashed + self._code_table[text] = hashed return hashed _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S) @@ -2335,7 +2336,7 @@ def _do_link_patterns(self, text): def _unescape_special_chars(self, text): # Swap back in all the special characters we've hidden. - for ch, hash in list(self._escape_table.items()): + for ch, hash in list(self._escape_table.items()) + list(self._code_table.items()): text = text.replace(hash, ch) return text diff --git a/test/tm-cases/backslash_removed_by_adjacent_backtick.html b/test/tm-cases/backslash_removed_by_adjacent_backtick.html new file mode 100644 index 00000000..1091b1a7 --- /dev/null +++ b/test/tm-cases/backslash_removed_by_adjacent_backtick.html @@ -0,0 +1,7 @@ +

hello \world

+ +

hello \world my favourite letter is w

+ +

hello \world my favourite code is import pickle

+ +

hello \world my favourite letter is x

diff --git a/test/tm-cases/backslash_removed_by_adjacent_backtick.text b/test/tm-cases/backslash_removed_by_adjacent_backtick.text new file mode 100644 index 00000000..4136c904 --- /dev/null +++ b/test/tm-cases/backslash_removed_by_adjacent_backtick.text @@ -0,0 +1,7 @@ +hello \world + +hello \world my favourite letter is `w` + +hello \world my favourite code is `import pickle` + +hello \world my favourite letter is `x` \ No newline at end of file From 6269c1f5f5e812f85ffb8524b8bf10b615579abf Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Tue, 22 Mar 2022 14:35:15 -0400 Subject: [PATCH 015/247] Update CHANGES.md --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index efaa9186..f133c924 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,8 @@ - [pull #417] Fix inline code pipe symbol within tables (issue #399) - [pull #418] Fix code block parsing error (issue #327) - [pull #419] Fix hr block created when not supposed to (issue #400) +- [pull #421] Fix backslashes removed by adjacent code blocks (issues #369 and #412) +- [pull #420] Fix md5-* in resulting HTML when several code blocks follow one by one (issue #355) ## python-markdown2 2.4.2 From 4aa4d06d305c54c41d3a14832f7ac514d8a517a7 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 23 Mar 2022 15:43:33 +0000 Subject: [PATCH 016/247] Fix excessive `
` tags in `ul` lists using `break-on-newline` extra (issue#396). --- lib/markdown2.py | 2 +- ...reak_on_newline_excessive_br_tags_in_ul.html | 17 +++++++++++++++++ ...reak_on_newline_excessive_br_tags_in_ul.opts | 1 + ...reak_on_newline_excessive_br_tags_in_ul.text | 8 ++++++++ 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 test/tm-cases/break_on_newline_excessive_br_tags_in_ul.html create mode 100644 test/tm-cases/break_on_newline_excessive_br_tags_in_ul.opts create mode 100644 test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 500f5f68..c368278f 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1220,7 +1220,7 @@ def _run_span_gamut(self, text): # Do hard breaks: if "break-on-newline" in self.extras: - text = re.sub(r" *\n", ")", "Conteúdo + +
    +
  • O que é estrutura Co-locada (on premises), o que é estrutura híbrida e o que é estrutura em-nuvem? +
      +
    • Em Nuvem (cloud based)
    • +
    • Uma estrutura em-nuvem tem todos os seus principais recursos providos por um provedor de serviços em nuvem. +
        +
      • Uma definição formal de serviço em nuvem pode ser:
      • +
      • Entrega via internet de um serviço de Tecnologia da Informação, sob demanda, em um modelo de pague-pelo-que-consome. +
          +
        • Brown Field é quando você migra um serviço existente
        • +
        • Green field é quando você começa do zero na nuvem, alguns também chamam isto de Cloud



        • +


      • +

    • +
  • +
diff --git a/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.opts b/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.opts new file mode 100644 index 00000000..a3d91cea --- /dev/null +++ b/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.opts @@ -0,0 +1 @@ +{"extras": ["break-on-newline"]} \ No newline at end of file diff --git a/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text b/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text new file mode 100644 index 00000000..c8596478 --- /dev/null +++ b/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text @@ -0,0 +1,8 @@ +## Conteúdo +- O que é estrutura Co-locada (on premises), o que é estrutura híbrida e o que é estrutura em-nuvem? + - Em Nuvem (cloud based) + - Uma estrutura em-nuvem tem todos os seus principais recursos providos por um provedor de serviços em nuvem. + - Uma definição formal de serviço em nuvem pode ser: + - Entrega via internet de um serviço de Tecnologia da Informação, sob demanda, em um modelo de pague-pelo-que-consome. + - Brown Field é quando você migra um serviço existente + - Green field é quando você começa do zero na nuvem, alguns também chamam isto de Cloud \ No newline at end of file From 915b567875ea32c6f45a5d4ebdb16f29d93d042a Mon Sep 17 00:00:00 2001 From: Crozzers Date: Wed, 23 Mar 2022 16:04:01 +0000 Subject: [PATCH 017/247] Fix excessive `
` tags in `ol` lists and at the end of `ul` lists using `break-on-newline` extra --- lib/markdown2.py | 2 +- ...ak_on_newline_excessive_br_tags_in_ul.html | 43 +++++++++++++++++-- ...ak_on_newline_excessive_br_tags_in_ul.text | 19 +++++++- 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index c368278f..beec36c9 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1220,7 +1220,7 @@ def _run_span_gamut(self, text): # Do hard breaks: if "break-on-newline" in self.extras: - text = re.sub(r" *\n(?!\<(?:ul|li)\>)", ")", "Conteúdo
  • Entrega via internet de um serviço de Tecnologia da Informação, sob demanda, em um modelo de pague-pelo-que-consome.
    • Brown Field é quando você migra um serviço existente
    • -
    • Green field é quando você começa do zero na nuvem, alguns também chamam isto de Cloud



    • -


  • -
    +
  • Green field é quando você começa do zero na nuvem, alguns também chamam isto de Cloud
  • + + + +

    Ordered List

    + +
      +
    1. A +
        +
      1. B +
          +
        1. C +
            +
          1. D
          2. +
          3. E
          4. +
        2. +
      2. +
    2. +
    + +

    Mixed List

    + +
      +
    1. A +
        +
      • B +
          +
        1. C +
            +
          • D
          • +
          • E
          • +
        2. +
        3. F +
            +
          1. G
          2. +
          3. H
          4. +
        4. +
      • +
    2. +
    diff --git a/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text b/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text index c8596478..aacdd9be 100644 --- a/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text +++ b/test/tm-cases/break_on_newline_excessive_br_tags_in_ul.text @@ -5,4 +5,21 @@ - Uma definição formal de serviço em nuvem pode ser: - Entrega via internet de um serviço de Tecnologia da Informação, sob demanda, em um modelo de pague-pelo-que-consome. - Brown Field é quando você migra um serviço existente - - Green field é quando você começa do zero na nuvem, alguns também chamam isto de Cloud \ No newline at end of file + - Green field é quando você começa do zero na nuvem, alguns também chamam isto de Cloud + +## Ordered List +1. A + 1. B + 1. C + 1. D + 2. E + +## Mixed List +1. A + - B + 1. C + - D + - E + 2. F + 1. G + 2. H \ No newline at end of file From b912db0fd3251c28a09e905045d0c35a36220c1b Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Thu, 24 Mar 2022 22:21:30 -0400 Subject: [PATCH 018/247] Update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index f133c924..57046141 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,6 +9,7 @@ - [pull #419] Fix hr block created when not supposed to (issue #400) - [pull #421] Fix backslashes removed by adjacent code blocks (issues #369 and #412) - [pull #420] Fix md5-* in resulting HTML when several code blocks follow one by one (issue #355) +- [pull #422] Fix excessive
    tags in lists using break-on-newline extra (issue #394) ## python-markdown2 2.4.2 From bd413d8fb1343350a78f7b5585d785efa7051a87 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Mon, 28 Mar 2022 19:48:48 +0100 Subject: [PATCH 019/247] Standardize key and value definitions for metadata extra. (issue #423) Also split `_meta_data_pattern` regexp over multiple lines to improve readability. Metadata keys are now defined as `[\S \t]+\s*`, one or more non whitespace chars (or spaces/tabs) followed by an optional whitespace before the colon. Metadata values are defined as `.*`, zero or more non-newline characters. This change also fixes #398 --- lib/markdown2.py | 20 +++++++++++++------- test/tm-cases/metadata3.html | 1 + test/tm-cases/metadata3.metadata | 26 ++++++++++++++++++++++++++ test/tm-cases/metadata3.opts | 1 + test/tm-cases/metadata3.text | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 test/tm-cases/metadata3.html create mode 100644 test/tm-cases/metadata3.metadata create mode 100644 test/tm-cases/metadata3.opts create mode 100644 test/tm-cases/metadata3.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 500f5f68..f7076207 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -444,14 +444,20 @@ def preprocess(self, text): # another-var: blah blah # # # header - _meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?((?:[\S\w]+\s*:(?:\n+[ \t]+.*)+)|(?:.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|(?:\s*[\S\w]+\s*:(?! >)[ \t]*.*\n?))(?:---[\ \t]*\n)?', re.MULTILINE) - _key_val_pat = re.compile(r"[\S\w]+\s*:(?! >)[ \t]*.*\n?", re.MULTILINE) - # this allows key: > - # value - # conutiues over multiple lines - _key_val_block_pat = re.compile( - r"(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE + _meta_data_pattern = re.compile(r''' + ^(?:---[\ \t]*\n)?( # optional opening fence + (?: + [\S \t]+\s*:(?:\n+[ \t]+.*)+ # indented lists + )|(?: + (?:[\S \t]+\s*:\s+>(?:\n\s+.*)+?) # multiline long descriptions + (?=\n[\S \t]+\s*:\s*.*\n|\s*\Z) # match up until the start of the next key:value definition or the end of the input text + )|(?: + \s*[\S \t]+\s*:(?! >)[ \t]*.*\n? # simple key:value pair, leading spaces allowed + ) + )(?:---[\ \t]*\n)? # optional closing fence + ''', re.MULTILINE | re.VERBOSE ) + _key_val_list_pat = re.compile( r"^-(?:[ \t]*([^\n]*)(?:[ \t]*[:-][ \t]*(\S+))?)(?:\n((?:[ \t]+[^\n]+\n?)+))?", re.MULTILINE, diff --git a/test/tm-cases/metadata3.html b/test/tm-cases/metadata3.html new file mode 100644 index 00000000..2b85c958 --- /dev/null +++ b/test/tm-cases/metadata3.html @@ -0,0 +1 @@ +

    This tests various metadata key:value configurations to make sure they will work well consecutively

    diff --git a/test/tm-cases/metadata3.metadata b/test/tm-cases/metadata3.metadata new file mode 100644 index 00000000..f5033d0d --- /dev/null +++ b/test/tm-cases/metadata3.metadata @@ -0,0 +1,26 @@ +{ + "basic": "value", + "basic2": "test consecutive basic keys", + "empty": "", + "empty2": "", + "long-desc": "long multiline\n description\nwith varying levels of\n indentation", + "long-desc2": "test consecutive long descriptions", + "nested": [ + "list item", + "following a long description" + ], + "nested2": [ + "consecutive nested" + ], + "nested3": [ + { + "subkey": "with subkeys" + } + ], + "long-desc3": "long description following a nested", + "empty-following-long-desc": "", + "key with spaces": "will also be recognized", + "-key_start_with_hyphen": "allowed", + "tab indented key": "allowed", + "finish-with": "a nice long description\nover a couple lines" +} diff --git a/test/tm-cases/metadata3.opts b/test/tm-cases/metadata3.opts new file mode 100644 index 00000000..04e37532 --- /dev/null +++ b/test/tm-cases/metadata3.opts @@ -0,0 +1 @@ +{"extras": ["metadata"]} diff --git a/test/tm-cases/metadata3.text b/test/tm-cases/metadata3.text new file mode 100644 index 00000000..7dacdad1 --- /dev/null +++ b/test/tm-cases/metadata3.text @@ -0,0 +1,32 @@ +--- +basic: value +basic2: test consecutive basic keys +empty: +empty2 : +long-desc: > + long multiline + description + with varying levels of + indentation +long-desc2: > + test consecutive long descriptions +nested: + - list item + - following a long description +nested2: + - consecutive nested +nested3: + - + subkey: with subkeys +long-desc3: > + long description following a nested +empty-following-long-desc: +key with spaces: will also be recognized +-key_start_with_hyphen: allowed + tab indented key: allowed +finish-with : > + a nice long description + over a couple lines +--- + +This tests various metadata key:value configurations to make sure they will work well consecutively \ No newline at end of file From b956e9c9c92e91e1f064526d20dd8ee29615dc07 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 7 Apr 2022 15:52:16 +0100 Subject: [PATCH 020/247] Remove redundant character class from `_meta_data_pattern` --- lib/markdown2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index f7076207..c8be9dc3 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -452,7 +452,7 @@ def preprocess(self, text): (?:[\S \t]+\s*:\s+>(?:\n\s+.*)+?) # multiline long descriptions (?=\n[\S \t]+\s*:\s*.*\n|\s*\Z) # match up until the start of the next key:value definition or the end of the input text )|(?: - \s*[\S \t]+\s*:(?! >)[ \t]*.*\n? # simple key:value pair, leading spaces allowed + \s*[\S \t]+\s*:(?! >).*\n? # simple key:value pair, leading spaces allowed ) )(?:---[\ \t]*\n)? # optional closing fence ''', re.MULTILINE | re.VERBOSE From 57ae040d7b635977f300da1a5dec617f3cfd6999 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 7 Apr 2022 15:53:08 +0100 Subject: [PATCH 021/247] Fix metadata key definition to disallow pure whitespace keys. The previous definition, `[\S \t]+\s*` , would allow a completely whitespace key to exist. The new definition, `[\S \t]*\w[\S \t]*\s*` , allows keys that can contain spaces and tabs but requires at least one word character to be in the key somewhere --- lib/markdown2.py | 8 ++++---- test/tm-cases/metadata3.text | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index c8be9dc3..a0427d70 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -447,12 +447,12 @@ def preprocess(self, text): _meta_data_pattern = re.compile(r''' ^(?:---[\ \t]*\n)?( # optional opening fence (?: - [\S \t]+\s*:(?:\n+[ \t]+.*)+ # indented lists + [\S \t]*\w[\S \t]*\s*:(?:\n+[ \t]+.*)+ # indented lists )|(?: - (?:[\S \t]+\s*:\s+>(?:\n\s+.*)+?) # multiline long descriptions - (?=\n[\S \t]+\s*:\s*.*\n|\s*\Z) # match up until the start of the next key:value definition or the end of the input text + (?:[\S \t]*\w[\S \t]*\s*:\s+>(?:\n\s+.*)+?) # multiline long descriptions + (?=\n[\S \t]*\w[\S \t]*\s*:\s*.*\n|\s*\Z) # match up until the start of the next key:value definition or the end of the input text )|(?: - \s*[\S \t]+\s*:(?! >).*\n? # simple key:value pair, leading spaces allowed + [\S \t]*\w[\S \t]*\s*:(?! >).*\n? # simple key:value pair, leading spaces allowed ) )(?:---[\ \t]*\n)? # optional closing fence ''', re.MULTILINE | re.VERBOSE diff --git a/test/tm-cases/metadata3.text b/test/tm-cases/metadata3.text index 7dacdad1..e2e12687 100644 --- a/test/tm-cases/metadata3.text +++ b/test/tm-cases/metadata3.text @@ -1,4 +1,5 @@ --- + : empty key should be ignored basic: value basic2: test consecutive basic keys empty: From bdb1c16d985b28e60b64a1c7e7b935596e19b6f6 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 7 Apr 2022 23:48:50 +0100 Subject: [PATCH 022/247] Don't test library against unsupported Python versions. Previously the tests would still be run against Python 2.7 if they found it installed on your machine. Now they will run on Python versions 3.5 to 3.10 and currently unreleased versions up to and including 3.19. --- test/testall.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/testall.py b/test/testall.py index 689fe5c0..b236cbbc 100644 --- a/test/testall.py +++ b/test/testall.py @@ -26,7 +26,8 @@ def _python_ver_from_python(python): def _gen_python_names(): yield "python" - for ver in [(2,6), (2,7), (3,3), (3,4), (3,5), (3,6), (3,7)]: + # generate version numbers from python 3.5 to 3.20 + for ver in [(3, i) for i in range(5, 20)]: yield "python%d.%d" % ver if sys.platform == "win32": yield "python%d%d" % ver @@ -43,8 +44,8 @@ def _gen_pythons(): def testall(): for ver, python in _gen_pythons(): - if ver < (2,6) or ver in ((3,0), (3,1), (3,2)): - # Don't support Python < 2.6, 3.0/3.1/3.2. + if ver < (3, 5): + # Don't support Python < 3.5 continue ver_str = "%s.%s" % ver print("-- test with Python %s (%s)" % (ver_str, python)) From b6aaa6874455ca2caa360696483ac915b5a8ff09 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Fri, 8 Apr 2022 15:13:34 +0100 Subject: [PATCH 023/247] Remove mentions of Python2 support from readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e1d44f5c..27425cda 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ was written to closely match the behaviour of the original Perl-implemented Markdown.pl. Markdown2 also comes with a number of extensions (called "extras") for things like syntax coloring, tables, header-ids. See the "Extra Syntax" section below. "markdown2" supports all Python versions -2.6+ or 3.3+ (and pypy and jython, though I don't frequently test those). +3.5+ (and pypy and jython, though I don't frequently test those). There is another [Python markdown.py](https://python-markdown.github.io/). However, at @@ -48,14 +48,14 @@ As a module: ```python >>> import markdown2 >>> markdown2.markdown("*boo!*") # or use `html = markdown_path(PATH)` -u'

    boo!

    \n' +'

    boo!

    \n' >>> from markdown2 import Markdown >>> markdowner = Markdown() >>> markdowner.convert("*boo!*") -u'

    boo!

    \n' +'

    boo!

    \n' >>> markdowner.convert("**boom!**") -u'

    boom!

    \n' +'

    boom!

    \n' ``` As a script (CLI): ```shell @@ -88,7 +88,7 @@ as a script: ```shell >>> import markdown2 >>> markdown2.markdown("*boo!*", extras=["footnotes"]) -u'

    boo!

    \n' +'

    boo!

    \n' ``` There are a number of currently implemented extras for tables, footnotes, syntax coloring of `
    `-blocks, auto-linking patterns, table of contents,
    
    From a16084d4f49dc19ac6f6437c92b9ac6adeb28585 Mon Sep 17 00:00:00 2001
    From: Crozzers 
    Date: Fri, 8 Apr 2022 19:09:35 +0100
    Subject: [PATCH 024/247] Fix #426
    
    The problem was that fenced code blocks would not be detected if placed inside a list (ul/ol) due to
    the leading indentation before the opening code fence.
    This commit tweaks the `_fenced_code_block_re` to ignore leading space/tab indents before an opening fence.
    
    `_code_block_sub` has also been altered to maintain the leading indentation when inserting code hashes as
    to not break the list (ul/ol) that the fenced code block is in.
    ---
     lib/markdown2.py                              | 11 +++++--
     .../tm-cases/fenced_code_blocks_issue426.html | 31 +++++++++++++++++++
     .../tm-cases/fenced_code_blocks_issue426.opts |  1 +
     .../tm-cases/fenced_code_blocks_issue426.tags |  1 +
     .../tm-cases/fenced_code_blocks_issue426.text | 26 ++++++++++++++++
     5 files changed, 68 insertions(+), 2 deletions(-)
     create mode 100644 test/tm-cases/fenced_code_blocks_issue426.html
     create mode 100644 test/tm-cases/fenced_code_blocks_issue426.opts
     create mode 100644 test/tm-cases/fenced_code_blocks_issue426.tags
     create mode 100644 test/tm-cases/fenced_code_blocks_issue426.text
    
    diff --git a/lib/markdown2.py b/lib/markdown2.py
    index beec36c9..4cff2215 100755
    --- a/lib/markdown2.py
    +++ b/lib/markdown2.py
    @@ -1879,10 +1879,17 @@ def unhash_code(codeblock):
                     return codeblock
                 lexer = self._get_pygments_lexer(lexer_name)
                 if lexer:
    +                # calculate code block's leading indent to not break lists
    +                leading_indent = re.match(r'[ \t]*(?=`{3,})', match.group(1))
    +                if leading_indent is not None:
    +                    leading_indent = leading_indent.group(0)
    +                else:
    +                    leading_indent = ''
    +
                     codeblock = unhash_code( codeblock )
                     colored = self._color_with_pygments(codeblock, lexer,
                                                         **formatter_opts)
    -                return "\n\n%s\n\n" % colored
    +                return "\n\n%s%s\n\n" % (leading_indent, colored)
     
             codeblock = self._encode_code(codeblock)
             pre_class_str = self._html_class_str_from_tag("pre")
    @@ -1930,7 +1937,7 @@ def _do_code_blocks(self, text):
     
         _fenced_code_block_re = re.compile(r'''
             (?:\n+|\A\n?|(?<=\n))
    -        (^`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n  # $1 = opening fence (captured for back-referencing), $2 = optional lang
    +        (^[ \t]*`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n  # $1 = opening fence (captured for back-referencing), $2 = optional lang
             (.*?)                             # $3 = code block content
             \1[ \t]*\n                      # closing fence
             ''', re.M | re.X | re.S)
    diff --git a/test/tm-cases/fenced_code_blocks_issue426.html b/test/tm-cases/fenced_code_blocks_issue426.html
    new file mode 100644
    index 00000000..4882d3d4
    --- /dev/null
    +++ b/test/tm-cases/fenced_code_blocks_issue426.html
    @@ -0,0 +1,31 @@
    +

    Django Templates

    + +

    NOTES

    + +
      +
    • The name should map to the URL.
    • +
    • No distro or app name prefix, they are namespaced by their dirs already
    • +
    • Since templates are made in python, the are named_with_underscores.html (not web style dashes).
    • +
    + +

    URL PARAMETERS IN THE TEMPLATE

    + +
      +
    • All views (except generic.View) from django.forms.generic inherit from ContextMixin
    • +
    • ContextMixin defines the method get_context_data:

      + +
          def get_context_data(self, **kwargs):
      +    kwargs.setdefault('view', self)
      +    if self.extra_context is not None:
      +        kwargs.update(self.extra_context)
      +    return kwargs
      +
      + +

      So when overriding one must be careful to extends super's kwargs:

      + +
          def get_context_data(self, **kwargs):
      +    kwargs = super().get_context_data(**kwargs)
      +    kwargs['page_title'] = "Documentation"
      +    return kwargs
      +
    • +
    diff --git a/test/tm-cases/fenced_code_blocks_issue426.opts b/test/tm-cases/fenced_code_blocks_issue426.opts new file mode 100644 index 00000000..20052b21 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue426.opts @@ -0,0 +1 @@ +{"extras": ["fenced-code-blocks", "pygments"]} diff --git a/test/tm-cases/fenced_code_blocks_issue426.tags b/test/tm-cases/fenced_code_blocks_issue426.tags new file mode 100644 index 00000000..2c03fb5d --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue426.tags @@ -0,0 +1 @@ +extra fenced-code-blocks pygments diff --git a/test/tm-cases/fenced_code_blocks_issue426.text b/test/tm-cases/fenced_code_blocks_issue426.text new file mode 100644 index 00000000..b272c762 --- /dev/null +++ b/test/tm-cases/fenced_code_blocks_issue426.text @@ -0,0 +1,26 @@ +# Django Templates + +## NOTES + +- The name should map to the URL. +- No distro or app name prefix, they are namespaced by their dirs already +- Since templates are made in python, the are `named_with_underscores.html` (not web style dashes). + +## URL PARAMETERS IN THE TEMPLATE + +- All views (except `generic.View`) from `django.forms.generic` inherit from `ContextMixin` +- `ContextMixin` defines the method `get_context_data`: + ```python + def get_context_data(self, **kwargs): + kwargs.setdefault('view', self) + if self.extra_context is not None: + kwargs.update(self.extra_context) + return kwargs + ``` + So when overriding one must be careful to extends `super`'s `kwargs`: + ```py + def get_context_data(self, **kwargs): + kwargs = super().get_context_data(**kwargs) + kwargs['page_title'] = "Documentation" + return kwargs + ``` \ No newline at end of file From 9c1897b2bce1058932b5d2bb9accd1a1d01b7508 Mon Sep 17 00:00:00 2001 From: Nicholas Serra Date: Fri, 8 Apr 2022 15:21:15 -0400 Subject: [PATCH 025/247] Update CHANGES.md --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 57046141..6f4d996c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,6 +10,8 @@ - [pull #421] Fix backslashes removed by adjacent code blocks (issues #369 and #412) - [pull #420] Fix md5-* in resulting HTML when several code blocks follow one by one (issue #355) - [pull #422] Fix excessive
    tags in lists using break-on-newline extra (issue #394) +- [pull #424] Standardize key and value definitions for metadata extra (issue #423) +- [pull #427] Fix fenced code blocks breaking lists (issue #426) ## python-markdown2 2.4.2 From 96fab528d3b68cd80f78b712c314f957796f2d15 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 9 Apr 2022 18:07:18 +0200 Subject: [PATCH 026/247] fix catastrophic backtracking in pyshell code blocks --- CHANGES.md | 1 + lib/markdown2.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6f4d996c..31f68c9b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,7 @@ - [pull #422] Fix excessive
    tags in lists using break-on-newline extra (issue #394) - [pull #424] Standardize key and value definitions for metadata extra (issue #423) - [pull #427] Fix fenced code blocks breaking lists (issue #426) +- [pull #428] Fix catastrophic backtracking (Regex DoS) in pyshell blocks. ## python-markdown2 2.4.2 diff --git a/lib/markdown2.py b/lib/markdown2.py index aa74ab1b..0e886acb 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1056,8 +1056,8 @@ def _prepare_pyshell_blocks(self, text): less_than_tab = self.tab_width - 1 _pyshell_block_re = re.compile(r""" - ^([ ]{0,%d})>>>[ ].*\n # first line - ^(\1.*\S+.*\n)* # any number of subsequent lines + ^([ ]{0,%d})>>>[ ].*\n # first line + ^(\1[^\S\n]*\S.*\n)* # any number of subsequent lines with at least one character ^\n # ends with a blank line """ % less_than_tab, re.M | re.X) From 7a3bc91ead2dae4c63c961e8f35e99c9788bbc2a Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 9 Apr 2022 11:39:58 +0100 Subject: [PATCH 027/247] Improve error message if `link_patterns` forgotten (#428) --- lib/markdown2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/markdown2.py b/lib/markdown2.py index aa74ab1b..1d2966bf 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -2310,6 +2310,7 @@ def _encode_email_address(self, addr): return addr def _do_link_patterns(self, text): + assert self.link_patterns is not None, "If the 'link-patterns' extra is used, an argument for 'link_patterns' is required" link_from_hash = {} for regex, repl in self.link_patterns: replacements = [] From 2a543846cf37ead2eace4668af546fc4e10c0d5d Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 10 Apr 2022 16:41:33 +0100 Subject: [PATCH 028/247] Fix incorrect indentation of fenced code blocks within lists --- lib/markdown2.py | 24 +++++++++++++------ .../tm-cases/fenced_code_blocks_issue426.html | 4 ++-- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index aa74ab1b..9f07155a 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1885,17 +1885,27 @@ def unhash_code(codeblock): return codeblock lexer = self._get_pygments_lexer(lexer_name) if lexer: - # calculate code block's leading indent to not break lists - leading_indent = re.match(r'[ \t]*(?=`{3,})', match.group(1)) - if leading_indent is not None: - leading_indent = leading_indent.group(0) - else: - leading_indent = '' + def uniform_dedent(text): + # find leading indentation of each line + ws = re.findall(r'(^[ \t]*)(?:[^ \t\n])', text, re.MULTILINE) + # get smallest common leading indent + ws = sorted(ws)[0] + # dedent every line by smallest common indent + return ws, ''.join( + (line.replace(ws, '', 1) if line.startswith(ws) else line) + for line in text.splitlines(True) + ) + + # remove leading indent from code block + leading_indent, codeblock = uniform_dedent(codeblock) codeblock = unhash_code( codeblock ) colored = self._color_with_pygments(codeblock, lexer, **formatter_opts) - return "\n\n%s%s\n\n" % (leading_indent, colored) + + # add back the indent to all lines + colored = ''.join(leading_indent + line for line in colored.splitlines(True)) + return "\n\n%s\n\n" % colored codeblock = self._encode_code(codeblock) pre_class_str = self._html_class_str_from_tag("pre") diff --git a/test/tm-cases/fenced_code_blocks_issue426.html b/test/tm-cases/fenced_code_blocks_issue426.html index 4882d3d4..152711ba 100644 --- a/test/tm-cases/fenced_code_blocks_issue426.html +++ b/test/tm-cases/fenced_code_blocks_issue426.html @@ -14,7 +14,7 @@

    URL PARAMETERS IN THE TEMPLATE

  • All views (except generic.View) from django.forms.generic inherit from ContextMixin
  • ContextMixin defines the method get_context_data:

    -
        def get_context_data(self, **kwargs):
    +
    def get_context_data(self, **kwargs):
         kwargs.setdefault('view', self)
         if self.extra_context is not None:
             kwargs.update(self.extra_context)
    @@ -23,7 +23,7 @@ 

    URL PARAMETERS IN THE TEMPLATE

    So when overriding one must be careful to extends super's kwargs:

    -
        def get_context_data(self, **kwargs):
    +
    def get_context_data(self, **kwargs):
         kwargs = super().get_context_data(**kwargs)
         kwargs['page_title'] = "Documentation"
         return kwargs
    
    From 5898fcc1090ef7cd7783fa1422cc0e53cbca9d1b Mon Sep 17 00:00:00 2001
    From: Crozzers 
    Date: Sun, 10 Apr 2022 21:42:02 +0100
    Subject: [PATCH 029/247] Fix filter bypass leading to XSS (#362)
    
    ---
     lib/markdown2.py                 | 2 +-
     test/tm-cases/xss_issue_362.html | 2 ++
     test/tm-cases/xss_issue_362.opts | 1 +
     test/tm-cases/xss_issue_362.text | 2 ++
     4 files changed, 6 insertions(+), 1 deletion(-)
     create mode 100644 test/tm-cases/xss_issue_362.html
     create mode 100644 test/tm-cases/xss_issue_362.opts
     create mode 100644 test/tm-cases/xss_issue_362.text
    
    diff --git a/lib/markdown2.py b/lib/markdown2.py
    index aa74ab1b..750a50a0 100755
    --- a/lib/markdown2.py
    +++ b/lib/markdown2.py
    @@ -2249,7 +2249,7 @@ def _encode_amps_and_angles(self, text):
             text = self._naked_gt_re.sub('>', text)
             return text
     
    -    _incomplete_tags_re = re.compile(r"<(/?\w+?(?!\w).+?[\s/]+?)")
    +    _incomplete_tags_re = re.compile(r"<(/?\w+?(?!\w)\s*?.+?[\s/]+?)")
     
         def _encode_incomplete_tags(self, text):
             if self.safe_mode not in ("replace", "escape"):
    diff --git a/test/tm-cases/xss_issue_362.html b/test/tm-cases/xss_issue_362.html
    new file mode 100644
    index 00000000..9d878bd3
    --- /dev/null
    +++ b/test/tm-cases/xss_issue_362.html
    @@ -0,0 +1,2 @@
    +

    <iframe +onload=alert()//

    diff --git a/test/tm-cases/xss_issue_362.opts b/test/tm-cases/xss_issue_362.opts new file mode 100644 index 00000000..8d202ad0 --- /dev/null +++ b/test/tm-cases/xss_issue_362.opts @@ -0,0 +1 @@ +{"safe_mode": True} \ No newline at end of file diff --git a/test/tm-cases/xss_issue_362.text b/test/tm-cases/xss_issue_362.text new file mode 100644 index 00000000..3016199a --- /dev/null +++ b/test/tm-cases/xss_issue_362.text @@ -0,0 +1,2 @@ +