Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ext/liquid_c/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ void liquid_define_tokenizer()

rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 3);
rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0);
rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0);
rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0);

// For testing the internal token representation.
rb_define_private_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0);
}

52 changes: 27 additions & 25 deletions lib/liquid/c.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,18 @@ class Tokenizer
end
end

Liquid::Tokenizer.class_eval do
def self.new(source, line_numbers = false, line_number: nil, for_liquid_tag: false)
source = source.to_s
if Liquid::C.enabled && source.bytesize <= Liquid::C::Tokenizer::MAX_SOURCE_BYTE_SIZE
source = source.encode(Encoding::UTF_8)
Liquid::C::Tokenizer.new(source, line_number || (line_numbers ? 1 : 0), for_liquid_tag)
Liquid::Raw.class_eval do
alias_method :ruby_parse, :parse

def parse(tokenizer)
if parse_context.liquid_c_nodes_disabled?
ruby_parse(tokenizer)
else
super
c_parse(tokenizer)
end
end
end

Liquid::Raw.class_eval do
alias_method :ruby_parse, :parse
end

Liquid::ParseContext.class_eval do
class << self
attr_accessor :liquid_c_nodes_disabled
Expand All @@ -73,34 +69,42 @@ def new_block_body
end
end

alias_method :ruby_new_tokenizer, :new_tokenizer

def new_tokenizer(source, start_line_number: nil, for_liquid_tag: false)
unless liquid_c_nodes_disabled?
source = source.to_s
if source.bytesize <= Liquid::C::Tokenizer::MAX_SOURCE_BYTE_SIZE
source = source.encode(Encoding::UTF_8)
return Liquid::C::Tokenizer.new(source, start_line_number || 0, for_liquid_tag)
else
@liquid_c_nodes_disabled = true
end
end

ruby_new_tokenizer(source, start_line_number: start_line_number, for_liquid_tag: for_liquid_tag)
end

# @api private
def liquid_c_nodes_disabled?
# Liquid::Profiler exposes the internal parse tree that we don't want to build when
# parsing with liquid-c, so consider liquid-c to be disabled when using it.
# Also, some templates are parsed before the profiler is running, on which case we
# provide the `disable_liquid_c_nodes` option to enable the Ruby AST to be produced
# so the profiler can use it on future runs.
@liquid_c_nodes_disabled ||= !Liquid::C.enabled || @template_options[:profile] ||
return @liquid_c_nodes_disabled if defined?(@liquid_c_nodes_disabled)
@liquid_c_nodes_disabled = !Liquid::C.enabled || @template_options[:profile] ||
@template_options[:disable_liquid_c_nodes] || self.class.liquid_c_nodes_disabled
end

# @api private
attr_writer :liquid_c_nodes_disabled
end

module Liquid
module C
module DocumentClassPatch
def parse(tokenizer, parse_context)
if tokenizer.is_a?(Liquid::C::Tokenizer)
if tokenizer.is_a?(Liquid::C::Tokenizer) && parse_context[:bug_compatible_whitespace_trimming]
# Temporary to test rollout of the fix for this bug
if parse_context[:bug_compatible_whitespace_trimming]
tokenizer.bug_compatible_whitespace_trimming!
end
else
# Liquid::Tokenizer.new may return a Liquid::Tokenizer if the source is too large
# to be supported, so indicate in the parse context that the liquid VM won't be used
parse_context.liquid_c_nodes_disabled = true
tokenizer.bug_compatible_whitespace_trimming!
end
super
end
Expand Down Expand Up @@ -237,12 +241,10 @@ def enabled=(value)
Liquid::Context.send(:alias_method, :evaluate, :c_evaluate)
Liquid::Context.send(:alias_method, :find_variable, :c_find_variable_kwarg)
Liquid::Context.send(:alias_method, :strict_variables=, :c_strict_variables=)
Liquid::Raw.send(:alias_method, :parse, :c_parse)
else
Liquid::Context.send(:alias_method, :evaluate, :ruby_evaluate)
Liquid::Context.send(:alias_method, :find_variable, :ruby_find_variable)
Liquid::Context.send(:alias_method, :strict_variables=, :ruby_strict_variables=)
Liquid::Raw.send(:alias_method, :parse, :ruby_parse)
end
end
end
Expand Down
29 changes: 15 additions & 14 deletions test/unit/tokenizer_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

class TokenizerTest < Minitest::Test
def test_tokenizer_nil
tokenizer = Liquid::Tokenizer.new(nil)
assert_nil(tokenizer.shift)
tokenizer = new_tokenizer(nil)
assert_nil(tokenizer.send(:shift))
end

def test_tokenize_strings
Expand Down Expand Up @@ -58,11 +58,11 @@ def test_utf8_encoded_source

def test_utf8_compatible_source
source = String.new('ascii', encoding: Encoding::ASCII)
tokenizer = Liquid::Tokenizer.new(source)
output = tokenizer.shift
tokenizer = new_tokenizer(source)
output = tokenizer.send(:shift)
assert_equal(Encoding::UTF_8, output.encoding)
assert_equal(source, output)
assert_nil(tokenizer.shift)
assert_nil(tokenizer.send(:shift))
end

def test_non_utf8_compatible_source
Expand All @@ -84,26 +84,27 @@ def test_source_too_large
assert_match(/Source too large, max \d+ bytes/, err.message)

# ruby patch fallback
liquid_c_tokenizer = Liquid::Tokenizer.new(max_length_source)
parse_context = Liquid::ParseContext.new
liquid_c_tokenizer = parse_context.new_tokenizer(max_length_source)
assert_instance_of(Liquid::C::Tokenizer, liquid_c_tokenizer)
fallback_tokenizer = Liquid::Tokenizer.new(too_large_source)
assert_instance_of(Liquid::Tokenizer, fallback_tokenizer)
refute(parse_context.liquid_c_nodes_disabled?)

# Document.parse patch parse context update
parse_context = Liquid::ParseContext.new
refute(parse_context.liquid_c_nodes_disabled?)
Liquid::Document.parse(liquid_c_tokenizer, parse_context)
refute(parse_context.liquid_c_nodes_disabled?)
Liquid::Document.parse(fallback_tokenizer, parse_context)
fallback_tokenizer = parse_context.new_tokenizer(too_large_source)
assert_instance_of(Liquid::Tokenizer, fallback_tokenizer)
assert_equal(true, parse_context.liquid_c_nodes_disabled?)
end

private

def new_tokenizer(source, parse_context: Liquid::ParseContext.new)
parse_context.new_tokenizer(source)
end

def tokenize(source, for_liquid_tag: false, trimmed: false)
tokenizer = Liquid::C::Tokenizer.new(source, 1, for_liquid_tag)
tokens = []
while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.shift)
while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.send(:shift))
tokens << t
end
tokens
Expand Down