diff --git a/benchmark/parse_comment.yaml b/benchmark/parse_comment.yaml
new file mode 100644
index 00000000..a0a3a771
--- /dev/null
+++ b/benchmark/parse_comment.yaml
@@ -0,0 +1,36 @@
+loop_count: 100
+contexts:
+ - gems:
+ rexml: 3.2.6
+ require: false
+ prelude: require 'rexml'
+ - name: master
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ - name: 3.2.6(YJIT)
+ gems:
+ rexml: 3.2.6
+ require: false
+ prelude: |
+ require 'rexml'
+ RubyVM::YJIT.enable
+ - name: master(YJIT)
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ RubyVM::YJIT.enable
+
+prelude: |
+ require 'rexml/document'
+
+ SIZE = 100000
+
+ top_level_xml = "\n"
+ in_doctype_xml = "]>"
+ after_doctype_xml = ""
+
+benchmark:
+ 'top_level' : REXML::Document.new(top_level_xml)
+ 'in_doctype' : REXML::Document.new(in_doctype_xml)
+ 'after_doctype' : REXML::Document.new(after_doctype_xml)
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index e666c2af..61d38ae2 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -277,14 +277,7 @@ def pull_event
return process_instruction
elsif @source.match?("/um, true)
- if md.nil?
- raise REXML::ParseException.new("Unclosed comment", @source)
- end
- if /--|-\z/.match?(md[1])
- raise REXML::ParseException.new("Malformed comment", @source)
- end
- return [ :comment, md[1] ]
+ return [ :comment, process_comment ]
elsif @source.match?("DOCTYPE", true)
base_error_message = "Malformed DOCTYPE"
unless @source.match?(/\s+/um, true)
@@ -417,12 +410,8 @@ def pull_event
raise REXML::ParseException.new(message, @source)
end
return [:notationdecl, name, *id]
- elsif md = @source.match(/--(.*?)-->/um, true)
- case md[1]
- when /--/, /-\z/
- raise REXML::ParseException.new("Malformed comment", @source)
- end
- return [ :comment, md[1] ] if md
+ elsif @source.match?("--", true)
+ return [ :comment, process_comment ]
end
elsif match = @source.match(/(%.*?;)\s*/um, true)
return [ :externalentity, match[1] ]
@@ -463,14 +452,8 @@ def pull_event
md = @source.match(/([^>]*>)/um)
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
- if md[0][0] == ?-
- md = @source.match(/--(.*?)-->/um, true)
-
- if md.nil? || /--|-\z/.match?(md[1])
- raise REXML::ParseException.new("Malformed comment", @source)
- end
-
- return [ :comment, md[1] ]
+ if @source.match?("--", true)
+ return [ :comment, process_comment ]
elsif @source.match?("[CDATA[", true)
text = @source.read_until("]]>")
if text.chomp!("]]>")
@@ -738,6 +721,18 @@ def parse_id_invalid_details(accept_external_id:,
end
end
+ def process_comment
+ text = @source.read_until("-->")
+ unless text.chomp!("-->")
+ raise REXML::ParseException.new("Unclosed comment: Missing end '-->'", @source)
+ end
+
+ if text.include? "--" or text.end_with?("-")
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+ text
+ end
+
def process_instruction
name = parse_name("Malformed XML: Invalid processing instruction node")
if @source.match?(/\s+/um, true)
diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb
index 4475dca7..c573e711 100644
--- a/test/parse/test_comment.rb
+++ b/test/parse/test_comment.rb
@@ -17,7 +17,7 @@ def test_toplevel_unclosed_comment
parse("'
Line: 1
Position: 4
Last 80 unconsumed characters:
@@ -48,6 +48,18 @@ def test_toplevel_malformed_comment_end
DETAIL
end
+ def test_doctype_unclosed_comment
+ exception = assert_raise(REXML::ParseException) do
+ parse("'
+ Line: 1
+ Position: 19
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
def test_doctype_malformed_comment_inner
exception = assert_raise(REXML::ParseException) do
parse("")
@@ -72,16 +84,15 @@ def test_doctype_malformed_comment_end
DETAIL
end
- def test_after_doctype_malformed_comment_short
+ def test_after_doctype_unclosed_comment
exception = assert_raise(REXML::ParseException) do
parse("")
end
- assert_equal(<<~DETAIL.chomp, exception.to_s)
- Malformed comment
+ assert_equal(<<~DETAIL, exception.to_s)
+ Unclosed comment: Missing end '-->'
Line: 1
Position: 8
Last 80 unconsumed characters:
- -->
DETAIL
end