From 11d38237d9f491588a58c83dc3d364a7d0d1d55b Mon Sep 17 00:00:00 2001 From: Dylan Thacker-Smith Date: Mon, 13 Jul 2015 22:00:25 -0400 Subject: [PATCH] Allow variable and tag end characters to be quoted. --- ext/liquid_c/tokenizer.c | 49 +++++++++++++++++++++++++------------ test/unit/tokenizer_test.rb | 13 ++++++++++ 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/ext/liquid_c/tokenizer.c b/ext/liquid_c/tokenizer.c index f46135fd..55e58d7b 100644 --- a/ext/liquid_c/tokenizer.c +++ b/ext/liquid_c/tokenizer.c @@ -82,30 +82,47 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token) token->type = TOKEN_INVALID; if (c == '%') { while (cursor < last) { - if (*cursor++ != '%') - continue; c = *cursor++; - while (c == '%' && cursor <= last) - c = *cursor++; - if (c != '}') - continue; - token->type = TOKEN_TAG; - goto found; + switch (c) { + case '"': case '\'': { + const char *end_quote = memchr(cursor, c, last + 1 - cursor); + if (end_quote) { + cursor = end_quote + 1; + } + break; + } + case '%': { + if (*cursor == '}') { + cursor++; + token->type = TOKEN_TAG; + goto found; + } + break; + } + } } // unterminated tag cursor = tokenizer->cursor + 2; goto found; } else { while (cursor < last) { - if (*cursor++ != '}') - continue; - if (*cursor++ != '}') { - // variable incomplete end, used to end raw tags - cursor--; - goto found; + c = *cursor++; + switch (c) { + case '"': case '\'': { + const char *end_quote = memchr(cursor, c, last + 1 - cursor); + if (end_quote) { + cursor = end_quote + 1; + } + break; + } + case '}': { + if (*cursor == '}') { + cursor++; + token->type = TOKEN_VARIABLE; + } // else variable incomplete end, used to end raw tags + goto found; + } } - token->type = TOKEN_VARIABLE; - goto found; } // unterminated variable cursor = tokenizer->cursor + 2; diff --git a/test/unit/tokenizer_test.rb b/test/unit/tokenizer_test.rb index 7d8c4356..e24c2ecd 100644 --- a/test/unit/tokenizer_test.rb +++ b/test/unit/tokenizer_test.rb @@ -30,6 +30,19 @@ def test_utf8_encoded_template assert_equal [source], output end + def test_quoted_strings + assert_equal ['{% assign foo = "%}" %}'], tokenize('{% assign foo = "%}" %}') + assert_equal ["{%assign foo = '%}'%}"], tokenize("{%assign foo = '%}'%}") + assert_equal ['{{ "}}" }}'], tokenize('{{ "}}" }}') + assert_equal ["{{'}}'}}"], tokenize("{{'}}'}}") + end + + def test_unterminated_quotes + assert_equal ['{% assign foo = " %}'], tokenize('{% assign foo = " %}') + assert_equal ['{{ " }}'], tokenize('{{ " }}') + assert_equal ["{{'}}"], tokenize("{{'}}") + end + private def tokenize(source)