From ba5a2f08ab221c43a6cdea4401a330cd25397d0a Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Fri, 30 Jan 2026 16:02:57 +0100 Subject: [PATCH 1/3] =?UTF-8?q?Optimize=20PipeTable=20parsing:=20O(n=C2=B2?= =?UTF-8?q?)=20=E2=86=92=20O(n)=20for=20large=20tables?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pipe tables were creating deeply nested tree structures where each pipe delimiter contained all subsequent content as children, causing O(n²) traversal complexity for n cells. This change restructures the parser to use a flat sibling-based structure, treating tables as matrices rather than nested trees. Key changes: - Set IsClosed=true on PipeTableDelimiterInline to prevent nesting - Add PromoteNestedPipesToRootLevel() to flatten pipes nested in emphasis - Update cell boundary detection to use sibling traversal - Move EmphasisInlineParser before PipeTableParser in processing order - Fix EmphasisInlineParser to continue past IsClosed delimiters - Add ContainsParentOrSiblingOfType() helper for flat structure detection Performance improvements (measured on typical markdown content): | Rows | Before | After | Speedup | |------|-----------|---------|---------| | 100 | 542 μs | 150 μs | 3.6x | | 500 | 23,018 μs | 763 μs | 30x | | 1000 | 89,418 μs | 1,596 μs| 56x | | 1500 | 201,593 μs| 2,740 μs| 74x | | 5000 | CRASH | 10,588 μs| ∞ | | 10000| CRASH | 18,551 μs| ∞ | Tables with 5000+ rows previously crashed due to stack overflow from recursive depth. They now parse successfully with linear time complexity. --- .gitignore | 2 + .../PipeTable/PipeTableBenchmark.cs | 81 +++++ .../PipeTable/PipeTableGenerator.cs | 61 ++++ .../PipeTable/baseline-results.md | 34 ++ src/Markdig.Benchmarks/Program.cs | 12 +- .../Extensions/Tables/PipeTableExtension.cs | 2 +- .../Extensions/Tables/PipeTableParser.cs | 311 +++++++++++------- .../Parsers/Inlines/CodeInlineParser.cs | 3 +- .../Parsers/Inlines/EmphasisInlineParser.cs | 5 +- src/Markdig/Syntax/Inlines/Inline.cs | 41 +++ 10 files changed, 436 insertions(+), 116 deletions(-) create mode 100644 src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs create mode 100644 src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs create mode 100644 src/Markdig.Benchmarks/PipeTable/baseline-results.md diff --git a/.gitignore b/.gitignore index bc09fdd57..8380627f9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ *.sln.docstates *.nuget.props *.nuget.targets +src/.idea +BenchmarkDotNet.Artifacts # User-specific files (MonoDevelop/Xamarin Studio) *.userprefs diff --git a/src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs b/src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs new file mode 100644 index 000000000..86e55cecb --- /dev/null +++ b/src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs @@ -0,0 +1,81 @@ +// Copyright (c) Alexandre Mutel. All rights reserved. +// This file is licensed under the BSD-Clause 2 license. +// See the license.txt file in the project root for more information. + +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Diagnosers; +using Markdig; + +namespace Testamina.Markdig.Benchmarks.PipeTable; + +/// +/// Benchmark for pipe table parsing performance, especially for large tables. +/// Tests the performance of PipeTableParser with varying table sizes. +/// +[MemoryDiagnoser] +[GcServer(true)] // Use server GC to get more comprehensive GC stats +public class PipeTableBenchmark +{ + private string _100Rows = null!; + private string _500Rows = null!; + private string _1000Rows = null!; + private string _1500Rows = null!; + private string _5000Rows = null!; + private string _10000Rows = null!; + private MarkdownPipeline _pipeline = null!; + + [GlobalSetup] + public void Setup() + { + // Pipeline with pipe tables enabled (part of advanced extensions) + _pipeline = new MarkdownPipelineBuilder() + .UseAdvancedExtensions() + .Build(); + + // Generate tables of various sizes + // Note: Before optimization, 5000+ rows hit depth limit due to nested tree structure. + // After optimization, these should work. + _100Rows = PipeTableGenerator.Generate(rows: 100, columns: 5); + _500Rows = PipeTableGenerator.Generate(rows: 500, columns: 5); + _1000Rows = PipeTableGenerator.Generate(rows: 1000, columns: 5); + _1500Rows = PipeTableGenerator.Generate(rows: 1500, columns: 5); + _5000Rows = PipeTableGenerator.Generate(rows: 5000, columns: 5); + _10000Rows = PipeTableGenerator.Generate(rows: 10000, columns: 5); + } + + [Benchmark(Description = "PipeTable 100 rows x 5 cols")] + public string Parse100Rows() + { + return Markdown.ToHtml(_100Rows, _pipeline); + } + + [Benchmark(Description = "PipeTable 500 rows x 5 cols")] + public string Parse500Rows() + { + return Markdown.ToHtml(_500Rows, _pipeline); + } + + [Benchmark(Description = "PipeTable 1000 rows x 5 cols")] + public string Parse1000Rows() + { + return Markdown.ToHtml(_1000Rows, _pipeline); + } + + [Benchmark(Description = "PipeTable 1500 rows x 5 cols")] + public string Parse1500Rows() + { + return Markdown.ToHtml(_1500Rows, _pipeline); + } + + [Benchmark(Description = "PipeTable 5000 rows x 5 cols")] + public string Parse5000Rows() + { + return Markdown.ToHtml(_5000Rows, _pipeline); + } + + [Benchmark(Description = "PipeTable 10000 rows x 5 cols")] + public string Parse10000Rows() + { + return Markdown.ToHtml(_10000Rows, _pipeline); + } +} diff --git a/src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs b/src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs new file mode 100644 index 000000000..2b9f7c34b --- /dev/null +++ b/src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs @@ -0,0 +1,61 @@ +// Copyright (c) Alexandre Mutel. All rights reserved. +// This file is licensed under the BSD-Clause 2 license. +// See the license.txt file in the project root for more information. + +using System.Text; + +namespace Testamina.Markdig.Benchmarks.PipeTable; + +/// +/// Generates pipe table markdown content for benchmarking purposes. +/// +public static class PipeTableGenerator +{ + private const int DefaultCellWidth = 10; + + /// + /// Generates a pipe table in markdown format. + /// + /// Number of data rows (excluding header) + /// Number of columns + /// Width of each cell content (default: 10) + /// Pipe table markdown string + public static string Generate(int rows, int columns, int cellWidth = DefaultCellWidth) + { + var sb = new StringBuilder(); + + // Header row + sb.Append('|'); + for (int col = 0; col < columns; col++) + { + sb.Append(' '); + sb.Append($"Header {col + 1}".PadRight(cellWidth)); + sb.Append(" |"); + } + sb.AppendLine(); + + // Separator row (with dashes) + sb.Append('|'); + for (int col = 0; col < columns; col++) + { + sb.Append(new string('-', cellWidth + 2)); + sb.Append('|'); + } + sb.AppendLine(); + + // Data rows + for (int row = 0; row < rows; row++) + { + sb.Append('|'); + for (int col = 0; col < columns; col++) + { + sb.Append(' '); + sb.Append($"R{row + 1}C{col + 1}".PadRight(cellWidth)); + sb.Append(" |"); + } + sb.AppendLine(); + } + + return sb.ToString(); + } +} diff --git a/src/Markdig.Benchmarks/PipeTable/baseline-results.md b/src/Markdig.Benchmarks/PipeTable/baseline-results.md new file mode 100644 index 000000000..b2a1d3323 --- /dev/null +++ b/src/Markdig.Benchmarks/PipeTable/baseline-results.md @@ -0,0 +1,34 @@ +# PipeTable Baseline Results (Pre-Optimization) + +## Summary Table (Server GC) + +| Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated | +|-------------------------------- |-------------:|------------:|------------:|-------:|-------:|-----------:| +| 'PipeTable 100 rows x 5 cols' | 542.0 us | 2.25 us | 1.88 us | 2.9297 | 0.9766 | 367.38 KB | +| 'PipeTable 500 rows x 5 cols' | 23,018.4 us | 150.30 us | 133.24 us | - | - | 1818.08 KB | +| 'PipeTable 1000 rows x 5 cols' | 89,418.0 us | 507.04 us | 474.28 us | - | - | 3702.7 KB | +| 'PipeTable 1500 rows x 5 cols' | 201,593.3 us | 2,133.24 us | 1,995.44 us | - | - | 5660.16 KB | +| 'PipeTable 5000 rows x 5 cols' | NA | NA | NA | NA | NA | NA | +| 'PipeTable 10000 rows x 5 cols' | NA | NA | NA | NA | NA | NA | + +## Scaling Analysis + +The scaling is clearly super-linear (O(n²) or worse): +- 100 → 500 (5x rows): 542µs → 23ms = **42x slowdown** +- 500 → 1000 (2x rows): 23ms → 89ms = **3.9x slowdown** +- 1000 → 1500 (1.5x rows): 89ms → 202ms = **2.3x slowdown** + +## Issues + +- 5000 and 10000 row benchmarks **FAIL** due to depth limit exceeded +- Deep nesting causes O(n²) backward traversal for cell boundary detection +- Memory allocation scales linearly but is excessive (~3.7 KB per row) +- Gen0/Gen1 collections drop to 0 for larger tables (server GC handles in batches) + +## Environment + +- BenchmarkDotNet v0.14.0 +- macOS 26.2 (Darwin 25.2.0) +- Apple M2 Pro, 12 cores +- .NET 8.0.8, Arm64 RyuJIT AdvSIMD +- Server GC enabled diff --git a/src/Markdig.Benchmarks/Program.cs b/src/Markdig.Benchmarks/Program.cs index 7433f43dd..79b1f66cd 100644 --- a/src/Markdig.Benchmarks/Program.cs +++ b/src/Markdig.Benchmarks/Program.cs @@ -7,6 +7,7 @@ using BenchmarkDotNet.Running; using Markdig; +using Testamina.Markdig.Benchmarks.PipeTable; namespace Testamina.Markdig.Benchmarks; @@ -68,7 +69,16 @@ static void Main(string[] args) //config.Add(gcDiagnoser); //var config = DefaultConfig.Instance; - BenchmarkRunner.Run(config); + + // Run specific benchmarks based on command line arguments + if (args.Length > 0 && args[0] == "--pipetable") + { + BenchmarkRunner.Run(config); + } + else + { + BenchmarkRunner.Run(config); + } //BenchmarkRunner.Run(config); //BenchmarkRunner.Run(); //BenchmarkRunner.Run(); diff --git a/src/Markdig/Extensions/Tables/PipeTableExtension.cs b/src/Markdig/Extensions/Tables/PipeTableExtension.cs index 84ae8ca38..6c7528831 100644 --- a/src/Markdig/Extensions/Tables/PipeTableExtension.cs +++ b/src/Markdig/Extensions/Tables/PipeTableExtension.cs @@ -38,7 +38,7 @@ public void Setup(MarkdownPipelineBuilder pipeline) var lineBreakParser = pipeline.InlineParsers.FindExact(); if (!pipeline.InlineParsers.Contains()) { - pipeline.InlineParsers.InsertBefore(new PipeTableParser(lineBreakParser!, Options)); + pipeline.InlineParsers.InsertAfter(new PipeTableParser(lineBreakParser!, Options)); } } diff --git a/src/Markdig/Extensions/Tables/PipeTableParser.cs b/src/Markdig/Extensions/Tables/PipeTableParser.cs index 601241fd6..4604fb92a 100644 --- a/src/Markdig/Extensions/Tables/PipeTableParser.cs +++ b/src/Markdig/Extensions/Tables/PipeTableParser.cs @@ -19,7 +19,7 @@ namespace Markdig.Extensions.Tables; /// public class PipeTableParser : InlineParser, IPostInlineProcessor { - private readonly LineBreakInlineParser lineBreakParser; + private readonly LineBreakInlineParser _lineBreakParser; /// /// Initializes a new instance of the class. @@ -28,7 +28,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor /// The options. public PipeTableParser(LineBreakInlineParser lineBreakParser, PipeTableOptions? options = null) { - this.lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser)); + _lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser)); OpeningCharacters = ['|', '\n', '\r']; Options = options ?? new PipeTableOptions(); } @@ -86,7 +86,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) tableState.IsInvalidTable = true; } tableState.LineHasPipe = false; - lineBreakParser.Match(processor, ref slice); + _lineBreakParser.Match(processor, ref slice); if (!isFirstLineEmpty) { tableState.ColumnAndLineDelimiters.Add(processor.Inline!); @@ -100,7 +100,8 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) Span = new SourceSpan(position, position), Line = globalLineIndex, Column = column, - LocalLineIndex = localLineIndex + LocalLineIndex = localLineIndex, + IsClosed = true // Creates flat sibling structure for O(n) traversal }; tableState.LineHasPipe = true; @@ -125,6 +126,8 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, return true; } + // With flat structure, pipes are siblings at root level + // Walk backwards from the last child to find pipe delimiters var child = container.LastChild; List? delimitersToRemove = null; @@ -142,8 +145,8 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, break; } - var subContainer = child as ContainerInline; - child = subContainer?.LastChild; + // Walk siblings instead of descending into containers + child = child.PreviousSibling; } // If we have found any delimiters, transform them to literals @@ -186,8 +189,8 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, // Remove previous state state.ParserStates[Index] = null!; - // Continue - if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe ) //|| tableState.LineIndex != state.LocalLineIndex) + // Abort if not a valid table + if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe) { if (tableState is not null) { @@ -204,11 +207,18 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, // Detect the header row var delimiters = tableState.ColumnAndLineDelimiters; - // TODO: we could optimize this by merging FindHeaderRow and the cell loop var aligns = FindHeaderRow(delimiters); if (Options.RequireHeaderSeparator && aligns is null) { + // No valid header separator found - convert all pipe delimiters to literals + foreach (var inline in delimiters) + { + if (inline is PipeTableDelimiterInline pipeDelimiter) + { + pipeDelimiter.ReplaceByLiteral(); + } + } return true; } @@ -224,68 +234,40 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, var cells = tableState.Cells; cells.Clear(); - //delimiters[0].DumpTo(state.DebugLog); + // Pipes may end up nested inside unmatched emphasis delimiters, e.g.: + // *a | b*| + // Promote them to root level so we have a flat sibling structure. + PromoteNestedPipesToRootLevel(delimiters, container); - // delimiters contain a list of `|` and `\n` delimiters - // The `|` delimiters are created as child containers. - // So the following: - // | a | b \n - // | d | e \n + // The inline tree is now flat: all pipes and line breaks are siblings at root level. + // For example, `| a | b \n| c | d \n` produces: + // [|] [a] [|] [b] [\n] [|] [c] [|] [d] [\n] // - // Will generate a tree of the following node: - // | - // a - // | - // b - // \n - // | - // d - // | - // e - // \n - // When parsing delimiters, we need to recover whether a row is of the following form: - // 0) | a | b | \n - // 1) | a | b \n - // 2) a | b \n - // 3) a | b | \n - - // If the last element is not a line break, add a line break to homogenize parsing in the next loop - var lastElement = delimiters[delimiters.Count - 1]; + // Tables support four row formats: + // | a | b | (leading and trailing pipes) + // | a | b (leading pipe only) + // a | b (no leading or trailing pipes) + // a | b | (trailing pipe only) + + // Ensure the table ends with a line break to simplify row detection + var lastElement = delimiters[^1]; if (!(lastElement is LineBreakInline)) { - while (true) + // Find the actual last sibling (there may be content after the last delimiter) + while (lastElement.NextSibling != null) { - if (lastElement is ContainerInline lastElementContainer) - { - var nextElement = lastElementContainer.LastChild; - if (nextElement != null) - { - lastElement = nextElement; - continue; - } - } - break; + lastElement = lastElement.NextSibling; } var endOfTable = new LineBreakInline(); - // If the last element is a container, we have to add the EOL to its child - // otherwise only next sibling - if (lastElement is ContainerInline) - { - ((ContainerInline)lastElement).AppendChild(endOfTable); - } - else - { - lastElement.InsertAfter(endOfTable); - } + lastElement.InsertAfter(endOfTable); delimiters.Add(endOfTable); tableState.EndOfLines.Add(endOfTable); } int lastPipePos = 0; - // Cell loop - // Reconstruct the table from the delimiters + // Build table rows and cells by iterating through delimiters TableRow? row = null; TableRow? firstRow = null; for (int i = 0; i < delimiters.Count; i++) @@ -300,9 +282,7 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, firstRow ??= row; - // If the first delimiter is a pipe and doesn't have any parent or previous sibling, for cases like: - // 0) | a | b | \n - // 1) | a | b \n + // Skip leading pipe at start of row (e.g., `| a | b` or `| a | b |`) if (pipeSeparator != null && (delimiter.PreviousSibling is null || delimiter.PreviousSibling is LineBreakInline)) { delimiter.Remove(); @@ -316,57 +296,37 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, } } - // We need to find the beginning/ending of a cell from a right delimiter. From the delimiter 'x', we need to find a (without the delimiter start `|`) - // So we iterate back to the first pipe or line break - // x - // 1) | a | b \n - // 2) a | b \n + // Find cell content by walking backwards from this delimiter to the previous pipe or line break. + // For `| a | b \n` at delimiter 'x': + // [|] [a] [x] [b] [\n] + // ^--- current delimiter + // Walk back: [a] is the cell content (stop at [|]) Inline? endOfCell = null; Inline? beginOfCell = null; - var cellContentIt = delimiter; - while (true) + var cellContentIt = delimiter.PreviousSibling; + while (cellContentIt != null) { - cellContentIt = cellContentIt.PreviousSibling ?? cellContentIt.Parent; - - if (cellContentIt is null || cellContentIt is LineBreakInline) - { + if (cellContentIt is LineBreakInline || cellContentIt is PipeTableDelimiterInline) break; - } - // The cell begins at the first effective child after a | or the top ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation) - if (cellContentIt is PipeTableDelimiterInline || (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null )) - { - beginOfCell = ((ContainerInline)cellContentIt).FirstChild; - if (endOfCell is null) - { - endOfCell = beginOfCell; - } + // Stop at the root ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation) + if (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null) break; - } beginOfCell = cellContentIt; - if (endOfCell is null) - { - endOfCell = beginOfCell; - } + endOfCell ??= beginOfCell; + + cellContentIt = cellContentIt.PreviousSibling; } - // If the current deilimiter is a pipe `|` OR + // If the current delimiter is a pipe `|` OR // the beginOfCell/endOfCell are not null and - // either they are : + // either they are: // - different // - they contain a single element, but it is not a line break (\n) or an empty/whitespace Literal. // Then we can add a cell to the current row if (!isLine || (beginOfCell != null && endOfCell != null && ( beginOfCell != endOfCell || !(beginOfCell is LineBreakInline || (beginOfCell is LiteralInline beingOfCellLiteral && beingOfCellLiteral.Content.IsEmptyOrWhitespace()))))) { - if (!isLine) - { - // If the delimiter is a pipe, we need to remove it from the tree - // so that previous loop looking for a parent will not go further on subsequent cells - delimiter.Remove(); - lastPipePos = delimiter.Span.End; - } - // We trim whitespace at the beginning and ending of the cell TrimStart(beginOfCell); TrimEnd(endOfCell); @@ -374,10 +334,20 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, var cellContainer = new ContainerInline(); // Copy elements from beginOfCell on the first level + // The pipe delimiter serves as a boundary - stop when we hit it var cellIt = beginOfCell; while (cellIt != null && !IsLine(cellIt) && !(cellIt is PipeTableDelimiterInline)) { var nextSibling = cellIt.NextSibling; + + // Skip empty literals (can result from trimming) + if (cellIt is LiteralInline { Content.IsEmpty: true }) + { + cellIt.Remove(); + cellIt = nextSibling; + continue; + } + cellIt.Remove(); if (cellContainer.Span.IsEmpty) { @@ -390,8 +360,16 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, cellIt = nextSibling; } + if (!isLine) + { + // Remove the pipe delimiter AFTER copying cell content + // This preserves the sibling chain during the copy loop + delimiter.Remove(); + lastPipePos = delimiter.Span.End; + } + // Create the cell and add it to the pending row - var tableParagraph = new ParagraphBlock() + var tableParagraph = new ParagraphBlock { Span = cellContainer.Span, Line = cellContainer.Line, @@ -423,11 +401,11 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, Debug.Assert(row != null); if (table.Span.IsEmpty) { - table.Span = row!.Span; + table.Span = row.Span; table.Line = row.Line; table.Column = row.Column; } - table.Add(row!); + table.Add(row); row = null; } } @@ -443,8 +421,7 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, endOfLine.Remove(); } - // If we have a header row, we can remove it - // TODO: we could optimize this by merging FindHeaderRow and the previous loop + // Mark first row as header and remove the separator row if present var tableRow = (TableRow)table[0]; tableRow.IsHeader = Options.RequireHeaderSeparator; if (aligns != null) @@ -454,11 +431,13 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, table.ColumnDefinitions.AddRange(aligns); } - // Perform delimiter processor that are coming after this processor + // Perform all post-processors on cell content + // With InsertAfter, emphasis runs before pipe table, so we need to re-run from index 0 + // to ensure emphasis delimiters in cells are properly matched foreach (var cell in cells) { var paragraph = (ParagraphBlock) cell[0]; - state.PostProcessInlines(postInlineProcessorIndex + 1, paragraph.Inline, null, true); + state.PostProcessInlines(0, paragraph.Inline, null, true); if (paragraph.Inline?.LastChild is not null) { paragraph.Inline.Span.End = paragraph.Inline.LastChild.Span.End; @@ -548,7 +527,7 @@ private static bool ParseHeaderString(Inline? inline, out TableColumnAlign? alig continue; } - // The last delimiter is always null, + // Parse the separator row (second row) to extract column alignments for (int j = i + 1; j < delimiters.Count; j++) { var delimiter = delimiters[j]; @@ -560,11 +539,13 @@ private static bool ParseHeaderString(Inline? inline, out TableColumnAlign? alig continue; } - // Check the left side of a `|` delimiter + // Parse the content before this delimiter as a column definition (e.g., `:---`, `---:`, `:---:`) + // Skip if previous sibling is a pipe (empty cell) or whitespace TableColumnAlign? align = null; int delimiterCount = 0; if (delimiter.PreviousSibling != null && - !(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) && // ignore parsed whitespace + !(delimiter.PreviousSibling is PipeTableDelimiterInline) && + !(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) && !ParseHeaderString(delimiter.PreviousSibling, out align, out delimiterCount)) { break; @@ -576,14 +557,13 @@ private static bool ParseHeaderString(Inline? inline, out TableColumnAlign? alig totalDelimiterCount += delimiterCount; columnDefinitions.Add(new TableColumnDefinition() { Alignment = align, Width = delimiterCount}); - // If this is the last delimiter, we need to check the right side of the `|` delimiter + // If this is the last pipe, check for a trailing column definition (row without trailing pipe) + // e.g., `| :--- | ---:` has content after the last pipe if (nextDelimiter is null) { - var nextSibling = columnDelimiter != null - ? columnDelimiter.FirstChild - : delimiter.NextSibling; + var nextSibling = delimiter.NextSibling; - // If there is no content after + // No trailing content means row ends with pipe: `| :--- |` if (IsNullOrSpace(nextSibling)) { isValidRow = true; @@ -664,9 +644,9 @@ private static bool IsStartOfLineColumnDelimiter(Inline? inline) private static void TrimStart(Inline? inline) { - while (inline is ContainerInline && !(inline is DelimiterInline)) + while (inline is ContainerInline containerInline && !(containerInline is DelimiterInline)) { - inline = ((ContainerInline)inline).FirstChild; + inline = containerInline.FirstChild; } if (inline is LiteralInline literal) @@ -677,6 +657,13 @@ private static void TrimStart(Inline? inline) private static void TrimEnd(Inline? inline) { + // Walk into containers to find the last leaf to trim + // Skip PipeTableDelimiterInline but walk into other containers (including emphasis) + while (inline is ContainerInline container && !(inline is PipeTableDelimiterInline)) + { + inline = container.LastChild; + } + if (inline is LiteralInline literal) { literal.Content.TrimEnd(); @@ -697,6 +684,106 @@ private static bool IsNullOrSpace(Inline? inline) return false; } + /// + /// Promotes nested pipe delimiters and line breaks to root level. + /// + /// + /// Handles cases like `*a | b*|` where the pipe ends up inside an unmatched emphasis container. + /// After promotion, all delimiters become siblings at root level for consistent cell boundary detection. + /// + private static void PromoteNestedPipesToRootLevel(List delimiters, ContainerInline root) + { + for (int i = 0; i < delimiters.Count; i++) + { + var delimiter = delimiters[i]; + + // Handle both pipe delimiters and line breaks + bool isPipe = delimiter is PipeTableDelimiterInline; + bool isLineBreak = delimiter is LineBreakInline; + if (!isPipe && !isLineBreak) + continue; + + // Skip if already at root level + if (delimiter.Parent == root) + continue; + + // Find the top-level ancestor (direct child of root) + var ancestor = delimiter.Parent; + while (ancestor?.Parent != null && ancestor.Parent != root) + { + ancestor = ancestor.Parent; + } + + if (ancestor is null || ancestor.Parent != root) + continue; + + // Split: promote delimiter to be sibling of ancestor + SplitContainerAtDelimiter(delimiter, ancestor); + } + } + + /// + /// Splits a container at the delimiter, promoting the delimiter to root level. + /// + /// + /// For input `*a | b*`, the pipe is inside the emphasis container: + /// EmphasisDelimiter { "a", Pipe, "b" } + /// After splitting: + /// EmphasisDelimiter { "a" }, Pipe, Container { "b" } + /// + private static void SplitContainerAtDelimiter(Inline delimiter, Inline ancestor) + { + if (delimiter.Parent is not { } parent) return; + + // Collect content after the delimiter + var contentAfter = new List(); + var current = delimiter.NextSibling; + while (current != null) + { + contentAfter.Add(current); + current = current.NextSibling; + } + + // Remove content after delimiter from parent + foreach (var inline in contentAfter) + { + inline.Remove(); + } + + // Remove delimiter from parent + delimiter.Remove(); + + // Insert delimiter after the ancestor (at root level) + ancestor.InsertAfter(delimiter); + + // If there's content after, wrap in new container and insert after delimiter + if (contentAfter.Count > 0) + { + // Create new container matching the original parent type + var newContainer = CreateMatchingContainer(parent); + foreach (var inline in contentAfter) + { + newContainer.AppendChild(inline); + } + delimiter.InsertAfter(newContainer); + } + } + + /// + /// Creates a container to wrap content split from the source container. + /// + private static ContainerInline CreateMatchingContainer(ContainerInline source) + { + // Emphasis processing runs before pipe table processing, so emphasis delimiters + // are already resolved. A plain ContainerInline suffices. + return new ContainerInline + { + Span = source.Span, + Line = source.Line, + Column = source.Column + }; + } + private sealed class TableState { public bool IsInvalidTable { get; set; } diff --git a/src/Markdig/Parsers/Inlines/CodeInlineParser.cs b/src/Markdig/Parsers/Inlines/CodeInlineParser.cs index 216e68f8c..b1fdbe87a 100644 --- a/src/Markdig/Parsers/Inlines/CodeInlineParser.cs +++ b/src/Markdig/Parsers/Inlines/CodeInlineParser.cs @@ -85,7 +85,8 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) // We saw the start of a code inline, but the close sticks are not present on the same line. // If the next line starts with a pipe character, this is likely an incomplete CodeInline within a table. // Treat it as regular text to avoid breaking the overall table shape. - if (processor.Inline != null && processor.Inline.ContainsParentOfType()) + // Use ContainsParentOrSiblingOfType to handle both nested and flat pipe table structures. + if (processor.Inline != null && processor.Inline.ContainsParentOrSiblingOfType()) { slice.Start = openingStart; return false; diff --git a/src/Markdig/Parsers/Inlines/EmphasisInlineParser.cs b/src/Markdig/Parsers/Inlines/EmphasisInlineParser.cs index e2aaada0c..8382afd26 100644 --- a/src/Markdig/Parsers/Inlines/EmphasisInlineParser.cs +++ b/src/Markdig/Parsers/Inlines/EmphasisInlineParser.cs @@ -125,7 +125,10 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, } // Follow DelimiterInline (EmphasisDelimiter, TableDelimiter...) - child = delimiterInline.FirstChild; + // If the delimiter has IsClosed=true (e.g., pipe table delimiter), it has no children + // In that case, continue to next sibling instead of stopping + var firstChild = delimiterInline.FirstChild; + child = firstChild ?? delimiterInline.NextSibling; } else { diff --git a/src/Markdig/Syntax/Inlines/Inline.cs b/src/Markdig/Syntax/Inlines/Inline.cs index 47f33af28..584743262 100644 --- a/src/Markdig/Syntax/Inlines/Inline.cs +++ b/src/Markdig/Syntax/Inlines/Inline.cs @@ -216,6 +216,47 @@ public bool ContainsParentOfType() where T : Inline return false; } + /// + /// Determines whether there is a sibling of the specified type among root-level siblings. + /// This walks up to find the root container, then checks all siblings. + /// + /// Type of the sibling to check + /// true if a sibling of the specified type exists; false otherwise + public bool ContainsParentOrSiblingOfType() where T : Inline + { + // First check parents (handles nested case) + if (ContainsParentOfType()) + { + return true; + } + + // Then check siblings at root level (handles flat case) + // Find the root container + var root = Parent; + while (root?.Parent != null) + { + root = root.Parent; + } + + if (root is not ContainerInline container) + { + return false; + } + + // Walk siblings looking for the type + var sibling = container.FirstChild; + while (sibling != null) + { + if (sibling is T) + { + return true; + } + sibling = sibling.NextSibling; + } + + return false; + } + /// /// Iterates on parents of the specified type. /// From 6c35d8abeddf05fe7c26d25d5950c8baa84a0633 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Fri, 30 Jan 2026 16:56:38 +0100 Subject: [PATCH 2/3] remove baseline results file --- .../PipeTable/baseline-results.md | 34 ------------------- 1 file changed, 34 deletions(-) delete mode 100644 src/Markdig.Benchmarks/PipeTable/baseline-results.md diff --git a/src/Markdig.Benchmarks/PipeTable/baseline-results.md b/src/Markdig.Benchmarks/PipeTable/baseline-results.md deleted file mode 100644 index b2a1d3323..000000000 --- a/src/Markdig.Benchmarks/PipeTable/baseline-results.md +++ /dev/null @@ -1,34 +0,0 @@ -# PipeTable Baseline Results (Pre-Optimization) - -## Summary Table (Server GC) - -| Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated | -|-------------------------------- |-------------:|------------:|------------:|-------:|-------:|-----------:| -| 'PipeTable 100 rows x 5 cols' | 542.0 us | 2.25 us | 1.88 us | 2.9297 | 0.9766 | 367.38 KB | -| 'PipeTable 500 rows x 5 cols' | 23,018.4 us | 150.30 us | 133.24 us | - | - | 1818.08 KB | -| 'PipeTable 1000 rows x 5 cols' | 89,418.0 us | 507.04 us | 474.28 us | - | - | 3702.7 KB | -| 'PipeTable 1500 rows x 5 cols' | 201,593.3 us | 2,133.24 us | 1,995.44 us | - | - | 5660.16 KB | -| 'PipeTable 5000 rows x 5 cols' | NA | NA | NA | NA | NA | NA | -| 'PipeTable 10000 rows x 5 cols' | NA | NA | NA | NA | NA | NA | - -## Scaling Analysis - -The scaling is clearly super-linear (O(n²) or worse): -- 100 → 500 (5x rows): 542µs → 23ms = **42x slowdown** -- 500 → 1000 (2x rows): 23ms → 89ms = **3.9x slowdown** -- 1000 → 1500 (1.5x rows): 89ms → 202ms = **2.3x slowdown** - -## Issues - -- 5000 and 10000 row benchmarks **FAIL** due to depth limit exceeded -- Deep nesting causes O(n²) backward traversal for cell boundary detection -- Memory allocation scales linearly but is excessive (~3.7 KB per row) -- Gen0/Gen1 collections drop to 0 for larger tables (server GC handles in batches) - -## Environment - -- BenchmarkDotNet v0.14.0 -- macOS 26.2 (Darwin 25.2.0) -- Apple M2 Pro, 12 cores -- .NET 8.0.8, Arm64 RyuJIT AdvSIMD -- Server GC enabled From cc7e38f6a19c414d071c90f0d66fd66d962dfcc1 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Fri, 30 Jan 2026 18:57:36 +0100 Subject: [PATCH 3/3] Do not use System.Index and fix nullabillity checks for older platforms --- .../Extensions/Tables/PipeTableParser.cs | 6 +-- src/Markdig/Helpers/LinkHelper.cs | 52 +++++++++---------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/Markdig/Extensions/Tables/PipeTableParser.cs b/src/Markdig/Extensions/Tables/PipeTableParser.cs index 4604fb92a..0e71a0be1 100644 --- a/src/Markdig/Extensions/Tables/PipeTableParser.cs +++ b/src/Markdig/Extensions/Tables/PipeTableParser.cs @@ -250,7 +250,7 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, // a | b | (trailing pipe only) // Ensure the table ends with a line break to simplify row detection - var lastElement = delimiters[^1]; + var lastElement = delimiters[delimiters.Count - 1]; if (!(lastElement is LineBreakInline)) { // Find the actual last sibling (there may be content after the last delimiter) @@ -401,11 +401,11 @@ public bool PostProcess(InlineProcessor state, Inline? root, Inline? lastChild, Debug.Assert(row != null); if (table.Span.IsEmpty) { - table.Span = row.Span; + table.Span = row!.Span; table.Line = row.Line; table.Column = row.Column; } - table.Add(row); + table.Add(row!); row = null; } } diff --git a/src/Markdig/Helpers/LinkHelper.cs b/src/Markdig/Helpers/LinkHelper.cs index fd7e6ba2d..6415d9219 100644 --- a/src/Markdig/Helpers/LinkHelper.cs +++ b/src/Markdig/Helpers/LinkHelper.cs @@ -1,5 +1,5 @@ // Copyright (c) Alexandre Mutel. All rights reserved. -// This file is licensed under the BSD-Clause 2 license. +// This file is licensed under the BSD-Clause 2 license. // See the license.txt file in the project root for more information. using Markdig.Syntax; @@ -65,7 +65,7 @@ public static string Urilize(ReadOnlySpan headingText, bool allowOnlyAscii for (int j = 0; j < (normalized.Length < 1 ? 1 : normalized.Length); j++) { - if (normalized != null) + if (!normalized.IsEmpty) { c = normalized[j]; } @@ -215,13 +215,13 @@ public static bool TryParseAutolink(ref StringSlice text, [NotNullWhen(true)] ou return false; } - // An absolute URI, for these purposes, consists of a scheme followed by a colon (:) - // followed by zero or more characters other than ASCII whitespace and control characters, <, and >. + // An absolute URI, for these purposes, consists of a scheme followed by a colon (:) + // followed by zero or more characters other than ASCII whitespace and control characters, <, and >. // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space). // A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop. - // a scheme is any sequence of 2–32 characters - // beginning with an ASCII letter + // a scheme is any sequence of 2–32 characters + // beginning with an ASCII letter // and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”). // An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec: @@ -276,7 +276,7 @@ public static bool TryParseAutolink(ref StringSlice text, [NotNullWhen(true)] ou if (isValidChar) { - // a scheme is any sequence of 2–32 characters + // a scheme is any sequence of 2–32 characters if (state > 0 && builder.Length >= 32) { goto ReturnFalse; @@ -307,7 +307,7 @@ public static bool TryParseAutolink(ref StringSlice text, [NotNullWhen(true)] ou } } - // append ':' or '@' + // append ':' or '@' builder.Append(c); if (state < 0) @@ -415,10 +415,10 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou public static bool TryParseInlineLink(ref StringSlice text, out string? link, out string? title, out SourceSpan linkSpan, out SourceSpan titleSpan) { - // 1. An inline link consists of a link text followed immediately by a left parenthesis (, + // 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? - // 3. an optional link destination, - // 4. an optional link title separated from the link destination by whitespace, + // 3. an optional link destination, + // 4. an optional link title separated from the link destination by whitespace, // 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 6. and a right parenthesis ) bool isValid = false; @@ -429,7 +429,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou linkSpan = SourceSpan.Empty; titleSpan = SourceSpan.Empty; - // 1. An inline link consists of a link text followed immediately by a left parenthesis (, + // 1. An inline link consists of a link text followed immediately by a left parenthesis (, if (c == '(') { text.SkipChar(); @@ -505,10 +505,10 @@ public static bool TryParseInlineLinkTrivia( out SourceSpan triviaAfterTitle, out bool urlHasPointyBrackets) { - // 1. An inline link consists of a link text followed immediately by a left parenthesis (, + // 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? - // 3. an optional link destination, - // 4. an optional link title separated from the link destination by whitespace, + // 3. an optional link destination, + // 4. an optional link title separated from the link destination by whitespace, // 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 6. and a right parenthesis ) bool isValid = false; @@ -526,7 +526,7 @@ public static bool TryParseInlineLinkTrivia( urlHasPointyBrackets = false; titleEnclosingCharacter = '\0'; - // 1. An inline link consists of a link text followed immediately by a left parenthesis (, + // 1. An inline link consists of a link text followed immediately by a left parenthesis (, if (c == '(') { text.SkipChar(); @@ -773,7 +773,7 @@ public static bool TryParseUrl(ref T text, [NotNullWhen(true)] out string? li var c = text.CurrentChar; - // a sequence of zero or more characters between an opening < and a closing > + // a sequence of zero or more characters between an opening < and a closing > // that contains no line breaks, or unescaped < or > characters, or if (c == '<') { @@ -820,9 +820,9 @@ public static bool TryParseUrl(ref T text, [NotNullWhen(true)] out string? li else { // a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters, - // and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a - // balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped - // parentheses. + // and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a + // balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped + // parentheses. bool hasEscape = false; int openedParent = 0; while (true) @@ -922,7 +922,7 @@ public static bool TryParseUrlTrivia(ref T text, out string? link, out bool h var c = text.CurrentChar; - // a sequence of zero or more characters between an opening < and a closing > + // a sequence of zero or more characters between an opening < and a closing > // that contains no line breaks, or unescaped < or > characters, or if (c == '<') { @@ -969,9 +969,9 @@ public static bool TryParseUrlTrivia(ref T text, out string? link, out bool h else { // a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters, - // and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a - // balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped - // parentheses. + // and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a + // balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped + // parentheses. bool hasEscape = false; int openedParent = 0; while (true) @@ -1201,7 +1201,7 @@ public static bool TryParseLinkReferenceDefinition(ref T text, if (c != '\0' && c != '\n' && c != '\r') { - // If we were able to parse the url but the title doesn't end with space, + // If we were able to parse the url but the title doesn't end with space, // we are still returning a valid definition if (newLineCount > 0 && title != null) { @@ -1341,7 +1341,7 @@ public static bool TryParseLinkReferenceDefinitionTrivia( if (c != '\0' && c != '\n' && c != '\r') { - // If we were able to parse the url but the title doesn't end with space, + // If we were able to parse the url but the title doesn't end with space, // we are still returning a valid definition if (newLineCount > 0 && title != null) {