diff --git a/benchmark/parse_cdata.yaml b/benchmark/parse_cdata.yaml
new file mode 100644
index 00000000..cde04306
--- /dev/null
+++ b/benchmark/parse_cdata.yaml
@@ -0,0 +1,50 @@
+loop_count: 100
+contexts:
+ - gems:
+ rexml: 3.2.6
+ require: false
+ prelude: require 'rexml'
+ - name: master
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ - name: 3.2.6(YJIT)
+ gems:
+ rexml: 3.2.6
+ require: false
+ prelude: |
+ require 'rexml'
+ RubyVM::YJIT.enable
+ - name: master(YJIT)
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+ require 'rexml'
+ RubyVM::YJIT.enable
+
+prelude: |
+ require 'rexml/document'
+ require 'rexml/parsers/sax2parser'
+ require 'rexml/parsers/pullparser'
+ require 'rexml/parsers/streamparser'
+ require 'rexml/streamlistener'
+
+ def build_xml(size)
+ xml = "\n" +
+ "Test\n" +
+ "\n"
+ end
+ xml = build_xml(100000)
+
+ class Listener
+ include REXML::StreamListener
+ end
+
+benchmark:
+ 'dom' : REXML::Document.new(xml)
+ 'sax' : REXML::Parsers::SAX2Parser.new(xml).parse
+ 'pull' : |
+ parser = REXML::Parsers::PullParser.new(xml)
+ while parser.has_next?
+ parser.pull
+ end
+ 'stream' : REXML::Parsers::StreamParser.new(xml, Listener.new).parse
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 44aacfa2..e666c2af 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -471,9 +471,13 @@ def pull_event
end
return [ :comment, md[1] ]
- else
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
- return [ :cdata, md[1] ] if md
+ elsif @source.match?("[CDATA[", true)
+ text = @source.read_until("]]>")
+ if text.chomp!("]]>")
+ return [ :cdata, text ]
+ else
+ raise REXML::ParseException.new("Malformed CDATA: Missing end ']]>'", @source)
+ end
end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 5ba5ab12..3ec1141e 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -67,7 +67,7 @@ class Source
module Private
SCANNER_RESET_SIZE = 100000
PRE_DEFINED_TERM_PATTERNS = {}
- pre_defined_terms = ["'", '"', "<"]
+ pre_defined_terms = ["'", '"', "<", "]]>"]
if StringScanner::Version < "3.1.1"
pre_defined_terms.each do |term|
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
diff --git a/test/parse/test_cdata.rb b/test/parse/test_cdata.rb
index b5f1a3bc..c742d6a1 100644
--- a/test/parse/test_cdata.rb
+++ b/test/parse/test_cdata.rb
@@ -7,10 +7,28 @@ module REXMLTests
class TestParseCData < Test::Unit::TestCase
include Test::Unit::CoreAssertions
+ def parse(xml)
+ REXML::Document.new(xml)
+ end
+
def test_linear_performance_gt
seq = [10000, 50000, 100000, 150000, 200000]
assert_linear_performance(seq, rehearsal: 10) do |n|
- REXML::Document.new('" * n + ' ]]>')
+ parse('" * n + ' ]]>')
+ end
+ end
+
+ class TestInvalid < self
+ def test_unclosed_cdata
+ exception = assert_raise(REXML::ParseException) do
+ parse("")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed CDATA: Missing end ']]>'
+ Line: 1
+ Position: 25
+ Last 80 unconsumed characters:
+ DETAIL
end
end
end