From fb274497c154efe65c8b988140c9c20f8b19cdc0 Mon Sep 17 00:00:00 2001 From: Dylan Thacker-Smith Date: Thu, 7 Jan 2021 11:14:16 -0500 Subject: [PATCH] Make Liquid::C::Tokenizer#shift private so it is only used for testing Since it won't be supported for liquid template (de)serialization. --- ext/liquid_c/tokenizer.c | 2 +- lib/liquid/c.rb | 52 +++++++++++++++++++------------------ test/unit/tokenizer_test.rb | 29 +++++++++++---------- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/ext/liquid_c/tokenizer.c b/ext/liquid_c/tokenizer.c index bbd9e260..ad03efc4 100644 --- a/ext/liquid_c/tokenizer.c +++ b/ext/liquid_c/tokenizer.c @@ -287,12 +287,12 @@ void liquid_define_tokenizer() rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate); rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 3); - rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0); rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0); rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0); rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0); // For testing the internal token representation. + rb_define_private_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0); rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0); } diff --git a/lib/liquid/c.rb b/lib/liquid/c.rb index d315612e..96913209 100644 --- a/lib/liquid/c.rb +++ b/lib/liquid/c.rb @@ -41,22 +41,18 @@ class Tokenizer end end -Liquid::Tokenizer.class_eval do - def self.new(source, line_numbers = false, line_number: nil, for_liquid_tag: false) - source = source.to_s - if Liquid::C.enabled && source.bytesize <= Liquid::C::Tokenizer::MAX_SOURCE_BYTE_SIZE - source = source.encode(Encoding::UTF_8) - Liquid::C::Tokenizer.new(source, line_number || (line_numbers ? 1 : 0), for_liquid_tag) +Liquid::Raw.class_eval do + alias_method :ruby_parse, :parse + + def parse(tokenizer) + if parse_context.liquid_c_nodes_disabled? + ruby_parse(tokenizer) else - super + c_parse(tokenizer) end end end -Liquid::Raw.class_eval do - alias_method :ruby_parse, :parse -end - Liquid::ParseContext.class_eval do class << self attr_accessor :liquid_c_nodes_disabled @@ -73,6 +69,22 @@ def new_block_body end end + alias_method :ruby_new_tokenizer, :new_tokenizer + + def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false) + unless liquid_c_nodes_disabled? + source = source.to_s + if source.bytesize <= Liquid::C::Tokenizer::MAX_SOURCE_BYTE_SIZE + source = source.encode(Encoding::UTF_8) + return Liquid::C::Tokenizer.new(source, start_line_number || 0, for_liquid_tag) + else + @liquid_c_nodes_disabled = true + end + end + + ruby_new_tokenizer(source, start_line_number: start_line_number, for_liquid_tag: for_liquid_tag) + end + # @api private def liquid_c_nodes_disabled? # Liquid::Profiler exposes the internal parse tree that we don't want to build when @@ -80,27 +92,19 @@ def liquid_c_nodes_disabled? # Also, some templates are parsed before the profiler is running, on which case we # provide the `disable_liquid_c_nodes` option to enable the Ruby AST to be produced # so the profiler can use it on future runs. - @liquid_c_nodes_disabled ||= !Liquid::C.enabled || @template_options[:profile] || + return @liquid_c_nodes_disabled if defined?(@liquid_c_nodes_disabled) + @liquid_c_nodes_disabled = !Liquid::C.enabled || @template_options[:profile] || @template_options[:disable_liquid_c_nodes] || self.class.liquid_c_nodes_disabled end - - # @api private - attr_writer :liquid_c_nodes_disabled end module Liquid module C module DocumentClassPatch def parse(tokenizer, parse_context) - if tokenizer.is_a?(Liquid::C::Tokenizer) + if tokenizer.is_a?(Liquid::C::Tokenizer) && parse_context[:bug_compatible_whitespace_trimming] # Temporary to test rollout of the fix for this bug - if parse_context[:bug_compatible_whitespace_trimming] - tokenizer.bug_compatible_whitespace_trimming! - end - else - # Liquid::Tokenizer.new may return a Liquid::Tokenizer if the source is too large - # to be supported, so indicate in the parse context that the liquid VM won't be used - parse_context.liquid_c_nodes_disabled = true + tokenizer.bug_compatible_whitespace_trimming! end super end @@ -237,12 +241,10 @@ def enabled=(value) Liquid::Context.send(:alias_method, :evaluate, :c_evaluate) Liquid::Context.send(:alias_method, :find_variable, :c_find_variable_kwarg) Liquid::Context.send(:alias_method, :strict_variables=, :c_strict_variables=) - Liquid::Raw.send(:alias_method, :parse, :c_parse) else Liquid::Context.send(:alias_method, :evaluate, :ruby_evaluate) Liquid::Context.send(:alias_method, :find_variable, :ruby_find_variable) Liquid::Context.send(:alias_method, :strict_variables=, :ruby_strict_variables=) - Liquid::Raw.send(:alias_method, :parse, :ruby_parse) end end end diff --git a/test/unit/tokenizer_test.rb b/test/unit/tokenizer_test.rb index 84e5496e..3f8525a4 100644 --- a/test/unit/tokenizer_test.rb +++ b/test/unit/tokenizer_test.rb @@ -4,8 +4,8 @@ class TokenizerTest < Minitest::Test def test_tokenizer_nil - tokenizer = Liquid::Tokenizer.new(nil) - assert_nil(tokenizer.shift) + tokenizer = new_tokenizer(nil) + assert_nil(tokenizer.send(:shift)) end def test_tokenize_strings @@ -58,11 +58,11 @@ def test_utf8_encoded_source def test_utf8_compatible_source source = String.new('ascii', encoding: Encoding::ASCII) - tokenizer = Liquid::Tokenizer.new(source) - output = tokenizer.shift + tokenizer = new_tokenizer(source) + output = tokenizer.send(:shift) assert_equal(Encoding::UTF_8, output.encoding) assert_equal(source, output) - assert_nil(tokenizer.shift) + assert_nil(tokenizer.send(:shift)) end def test_non_utf8_compatible_source @@ -84,26 +84,27 @@ def test_source_too_large assert_match(/Source too large, max \d+ bytes/, err.message) # ruby patch fallback - liquid_c_tokenizer = Liquid::Tokenizer.new(max_length_source) + parse_context = Liquid::ParseContext.new + liquid_c_tokenizer = parse_context.new_tokenizer(max_length_source) assert_instance_of(Liquid::C::Tokenizer, liquid_c_tokenizer) - fallback_tokenizer = Liquid::Tokenizer.new(too_large_source) - assert_instance_of(Liquid::Tokenizer, fallback_tokenizer) + refute(parse_context.liquid_c_nodes_disabled?) - # Document.parse patch parse context update parse_context = Liquid::ParseContext.new - refute(parse_context.liquid_c_nodes_disabled?) - Liquid::Document.parse(liquid_c_tokenizer, parse_context) - refute(parse_context.liquid_c_nodes_disabled?) - Liquid::Document.parse(fallback_tokenizer, parse_context) + fallback_tokenizer = parse_context.new_tokenizer(too_large_source) + assert_instance_of(Liquid::Tokenizer, fallback_tokenizer) assert_equal(true, parse_context.liquid_c_nodes_disabled?) end private + def new_tokenizer(source, parse_context: Liquid::ParseContext.new) + parse_context.new_tokenizer(source) + end + def tokenize(source, for_liquid_tag: false, trimmed: false) tokenizer = Liquid::C::Tokenizer.new(source, 1, for_liquid_tag) tokens = [] - while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.shift) + while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.send(:shift)) tokens << t end tokens