From fe1050de244ea5223cd80fc687acc7bf14b5e7d5 Mon Sep 17 00:00:00 2001 From: Alex Smith Date: Tue, 2 Sep 2025 14:33:18 +1000 Subject: [PATCH] Ignore base64 encoded data URLs This removes base64 encoded `src` values from `img` tags, which are not widely supported by markdown renderers, are are causing parser performance issues and crashes. --- CHANGELOG.md | 6 ++++++ lib/upmark.rb | 6 +++++- lib/upmark/transform/markdown.rb | 2 +- spec/acceptance/upmark_spec.rb | 11 +++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e067e66..b228d88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 2.0.0 / 2025-09-02 + +* Potentially Breaking Change: `img` tags with base64 encoded `src` values, + which are not widely supported by markdown renderers, are removed to avoid + parser performance issues and crashes + ## 1.1.0 / 2024-04-19 * basic handling for nested lists diff --git a/lib/upmark.rb b/lib/upmark.rb index 1dfb98c..0b05073 100644 --- a/lib/upmark.rb +++ b/lib/upmark.rb @@ -14,7 +14,11 @@ def self.convert(html) preprocess = Transform::Preprocess.new markdown = Transform::Markdown.new - ast = xml.parse(html.strip) + # Remove base64 data URLs that cause parser issues + html = html.gsub(/(data:image\/[^;]*;base64,)[A-Za-z0-9+\/=]+/, '').strip + + ast = xml.parse(html) + ast = normalise.apply(ast) ast = preprocess.apply(ast) ast = markdown.apply(ast) diff --git a/lib/upmark/transform/markdown.rb b/lib/upmark/transform/markdown.rb index d09d34d..fc44316 100644 --- a/lib/upmark/transform/markdown.rb +++ b/lib/upmark/transform/markdown.rb @@ -71,7 +71,7 @@ def self.text(element) element(:img) do |element| attributes = map_attributes_subtree(element[:attributes]) - href = attributes[:src] + href = attributes[:src].to_s title = attributes[:title] alt_text = attributes[:alt] diff --git a/spec/acceptance/upmark_spec.rb b/spec/acceptance/upmark_spec.rb index 95b0be0..1dccb38 100644 --- a/spec/acceptance/upmark_spec.rb +++ b/spec/acceptance/upmark_spec.rb @@ -86,6 +86,17 @@ def actual ![messenger bag skateboard](http://helvetica.com/image.gif "art party organic") MD end + + specify "removes base64 data URLs" do + expect(<<~HTML).to convert_to("") + + HTML + + src = "abc" * 10000 + expect(<<~HTML).to convert_to("") + + HTML + end end context "

" do