From 3f30c0fe4b41b88d75c1820f3a4dca946b20d13a Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 4 Feb 2025 16:10:46 -0500 Subject: [PATCH 1/2] simplify whitespace matching in regexes --- include/minja/minja.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index c304b5c..bf21fe7 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -1792,7 +1792,7 @@ class Parser { auto left = parseStringConcat(); if (!left) throw std::runtime_error("Expected left side of 'logical compare' expression"); - static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)"); + static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not\s+in\b)"); static std::regex not_tok(R"(not\b)"); std::string op_str; while (!(op_str = consumeToken(compare_tok)).empty()) { @@ -2171,7 +2171,7 @@ class Parser { using TemplateTokenIterator = TemplateTokenVector::const_iterator; std::vector parseVarNames() { - static std::regex varnames_regex(R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)"); + static std::regex varnames_regex(R"(((?:\w+)(?:\s*,\s*(?:\w+))*)\s*)"); std::vector group; if ((group = consumeTokenGroups(varnames_regex)).empty()) throw std::runtime_error("Expected variable names"); @@ -2194,13 +2194,13 @@ class Parser { } TemplateTokenVector tokenize() { - static std::regex comment_tok(R"(\{#([-~]?)([\s\S\r\n]*?)([-~]?)#\})"); + static std::regex comment_tok(R"(\{#([-~]?)([\s\S]*?)([-~]?)#\})"); static std::regex expr_open_regex(R"(\{\{([-~])?)"); - static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)"); + static std::regex block_open_regex(R"(^\{%([-~])?\s*)"); static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)"); static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)"); - static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})"); - static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})"); + static std::regex expr_close_regex(R"(\s*([-~])?\}\})"); + static std::regex block_close_regex(R"(\s*([-~])?%\})"); TemplateTokenVector tokens; std::vector group; @@ -2284,7 +2284,7 @@ class Parser { auto post_space = parseBlockClose(); tokens.push_back(std::make_unique(location, pre_space, post_space)); } else if (keyword == "set") { - static std::regex namespaced_var_regex(R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))"); + static std::regex namespaced_var_regex(R"((\w+)\s*\.\s*(\w+))"); std::string ns; std::vector var_names; @@ -2400,7 +2400,7 @@ class Parser { auto text = text_token->text; if (post_space == SpaceHandling::Strip) { - static std::regex trailing_space_regex(R"((\s|\r|\n)+$)"); + static std::regex trailing_space_regex(R"(\s+$)"); text = std::regex_replace(text, trailing_space_regex, ""); } else if (options.lstrip_blocks && it != end) { auto i = text.size(); @@ -2410,7 +2410,7 @@ class Parser { } } if (pre_space == SpaceHandling::Strip) { - static std::regex leading_space_regex(R"(^(\s|\r|\n)+)"); + static std::regex leading_space_regex(R"(^\s+)"); text = std::regex_replace(text, leading_space_regex, ""); } else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast((*(it - 2)).get())) { if (text.length() > 0 && text[0] == '\n') { From 96ebff4f6659ee35e879638e88b64ab1b31dbbe0 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Tue, 4 Feb 2025 17:02:21 -0500 Subject: [PATCH 2/2] tokenize: disallow zero-length matches --- include/minja/minja.hpp | 5 +++++ tests/test-syntax.cpp | 1 + 2 files changed, 6 insertions(+) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index bf21fe7..c726592 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -2336,6 +2336,11 @@ class Parser { throw std::runtime_error("Unexpected block: " + keyword); } } else if (std::regex_search(it, end, match, non_text_open_regex)) { + if (!match.position()) { + if (match[0] != "{#") + throw std::runtime_error("Internal error: Expected a comment"); + throw std::runtime_error("Missing end of comment tag"); + } auto text_end = it + match.position(); text = std::string(it, text_end); it = text_end; diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index b6f6d57..85bf222 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -522,6 +522,7 @@ TEST(SyntaxTest, SimpleCases) { EXPECT_THAT([]() { render("{% if 1 %}{% else %}", {}, {}); }, ThrowsWithSubstr("Unterminated if")); EXPECT_THAT([]() { render("{% if 1 %}{% else %}{% elif 1 %}{% endif %}", {}, {}); }, ThrowsWithSubstr("Unterminated if")); EXPECT_THAT([]() { render("{% filter trim %}", {}, {}); }, ThrowsWithSubstr("Unterminated filter")); + EXPECT_THAT([]() { render("{# ", {}, {}); }, ThrowsWithSubstr("Missing end of comment tag")); } EXPECT_EQ(