diff --git a/CHANGELOG.md b/CHANGELOG.md index e067e66..b228d88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 2.0.0 / 2025-09-02 + +* Potentially Breaking Change: `img` tags with base64 encoded `src` values, + which are not widely supported by markdown renderers, are removed to avoid + parser performance issues and crashes + ## 1.1.0 / 2024-04-19 * basic handling for nested lists diff --git a/lib/upmark.rb b/lib/upmark.rb index 1dfb98c..0b05073 100644 --- a/lib/upmark.rb +++ b/lib/upmark.rb @@ -14,7 +14,11 @@ def self.convert(html) preprocess = Transform::Preprocess.new markdown = Transform::Markdown.new - ast = xml.parse(html.strip) + # Remove base64 data URLs that cause parser issues + html = html.gsub(/(data:image\/[^;]*;base64,)[A-Za-z0-9+\/=]+/, '').strip + + ast = xml.parse(html) + ast = normalise.apply(ast) ast = preprocess.apply(ast) ast = markdown.apply(ast) diff --git a/lib/upmark/transform/markdown.rb b/lib/upmark/transform/markdown.rb index d09d34d..fc44316 100644 --- a/lib/upmark/transform/markdown.rb +++ b/lib/upmark/transform/markdown.rb @@ -71,7 +71,7 @@ def self.text(element) element(:img) do |element| attributes = map_attributes_subtree(element[:attributes]) - href = attributes[:src] + href = attributes[:src].to_s title = attributes[:title] alt_text = attributes[:alt] diff --git a/spec/acceptance/upmark_spec.rb b/spec/acceptance/upmark_spec.rb index 95b0be0..1dccb38 100644 --- a/spec/acceptance/upmark_spec.rb +++ b/spec/acceptance/upmark_spec.rb @@ -86,6 +86,17 @@ def actual ![messenger bag skateboard](http://helvetica.com/image.gif "art party organic") MD end + + specify "removes base64 data URLs" do + expect(<<~HTML).to convert_to("") + + HTML + + src = "abc" * 10000 + expect(<<~HTML).to convert_to("") + + HTML + end end context "

" do