Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions benchmark/parse_comment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
loop_count: 100
contexts:
- gems:
rexml: 3.2.6
require: false
prelude: require 'rexml'
- name: master
prelude: |
$LOAD_PATH.unshift(File.expand_path("lib"))
require 'rexml'
- name: 3.2.6(YJIT)
gems:
rexml: 3.2.6
require: false
prelude: |
require 'rexml'
RubyVM::YJIT.enable
- name: master(YJIT)
prelude: |
$LOAD_PATH.unshift(File.expand_path("lib"))
require 'rexml'
RubyVM::YJIT.enable

prelude: |
require 'rexml/document'

SIZE = 100000

top_level_xml = "<!--" + "a" * SIZE + "-->\n"
in_doctype_xml = "<!DOCTYPE foo [<!--" + "a" * SIZE + "-->]>"
after_doctype_xml = "<root/><!--" + "a" * SIZE + "-->"

benchmark:
'top_level' : REXML::Document.new(top_level_xml)
'in_doctype' : REXML::Document.new(in_doctype_xml)
'after_doctype' : REXML::Document.new(after_doctype_xml)
39 changes: 17 additions & 22 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,7 @@ def pull_event
return process_instruction
elsif @source.match?("<!", true)
if @source.match?("--", true)
md = @source.match(/(.*?)-->/um, true)
if md.nil?
raise REXML::ParseException.new("Unclosed comment", @source)
end
if /--|-\z/.match?(md[1])
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ]
return [ :comment, process_comment ]
elsif @source.match?("DOCTYPE", true)
base_error_message = "Malformed DOCTYPE"
unless @source.match?(/\s+/um, true)
Expand Down Expand Up @@ -417,12 +410,8 @@ def pull_event
raise REXML::ParseException.new(message, @source)
end
return [:notationdecl, name, *id]
elsif md = @source.match(/--(.*?)-->/um, true)
case md[1]
when /--/, /-\z/
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ] if md
elsif @source.match?("--", true)
return [ :comment, process_comment ]
end
elsif match = @source.match(/(%.*?;)\s*/um, true)
return [ :externalentity, match[1] ]
Expand Down Expand Up @@ -463,14 +452,8 @@ def pull_event
md = @source.match(/([^>]*>)/um)
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][0] == ?-
md = @source.match(/--(.*?)-->/um, true)

if md.nil? || /--|-\z/.match?(md[1])
raise REXML::ParseException.new("Malformed comment", @source)
end

return [ :comment, md[1] ]
if @source.match?("--", true)
return [ :comment, process_comment ]
elsif @source.match?("[CDATA[", true)
text = @source.read_until("]]>")
if text.chomp!("]]>")
Expand Down Expand Up @@ -738,6 +721,18 @@ def parse_id_invalid_details(accept_external_id:,
end
end

def process_comment
text = @source.read_until("-->")
unless text.chomp!("-->")
raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
end

if text.include? "--" or text.end_with?("-")
raise REXML::ParseException.new("Malformed comment", @source)
end
text
end

def process_instruction
name = parse_name("Malformed XML: Invalid processing instruction node")
if @source.match?(/\s+/um, true)
Expand Down
21 changes: 16 additions & 5 deletions test/parse/test_comment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_toplevel_unclosed_comment
parse("<!--")
end
assert_equal(<<~DETAIL, exception.to_s)
Unclosed comment
Unclosed comment: Missing end '-->'
Line: 1
Position: 4
Last 80 unconsumed characters:
Expand Down Expand Up @@ -48,6 +48,18 @@ def test_toplevel_malformed_comment_end
DETAIL
end

def test_doctype_unclosed_comment
exception = assert_raise(REXML::ParseException) do
parse("<!DOCTYPE foo [<!--")
end
assert_equal(<<~DETAIL, exception.to_s)
Unclosed comment: Missing end '-->'
Line: 1
Position: 19
Last 80 unconsumed characters:
DETAIL
end

def test_doctype_malformed_comment_inner
exception = assert_raise(REXML::ParseException) do
parse("<!DOCTYPE foo [<!-- -- -->")
Expand All @@ -72,16 +84,15 @@ def test_doctype_malformed_comment_end
DETAIL
end

def test_after_doctype_malformed_comment_short
def test_after_doctype_unclosed_comment
exception = assert_raise(REXML::ParseException) do
parse("<a><!-->")
end
assert_equal(<<~DETAIL.chomp, exception.to_s)
Malformed comment
assert_equal(<<~DETAIL, exception.to_s)
Unclosed comment: Missing end '-->'
Line: 1
Position: 8
Last 80 unconsumed characters:
-->
DETAIL
end

Expand Down