From a14e3eaa007f591b4a7d129fc15efe5fedaf894e Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Wed, 22 Jan 2025 03:13:15 +0000
Subject: [PATCH 1/2] Support `{% generation %}` blocks (no-op for inference)
 (#28)

---
 include/minja/minja.hpp | 28 ++++++++++++++++++++++++++--
 tests/CMakeLists.txt    |  1 +
 tests/test-syntax.cpp   |  4 ++++
 3 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp
index f0ee7a4..80bdd4b 100644
--- a/include/minja/minja.hpp
+++ b/include/minja/minja.hpp
@@ -693,7 +693,7 @@ enum SpaceHandling { Keep, Strip, StripSpaces, StripNewline };
 
 class TemplateToken {
 public:
-    enum class Type { Text, Expression, If, Else, Elif, EndIf, For, EndFor, Set, EndSet, Comment, Macro, EndMacro, Filter, EndFilter };
+    enum class Type { Text, Expression, If, Else, Elif, EndIf, For, EndFor, Generation, EndGeneration, Set, EndSet, Comment, Macro, EndMacro, Filter, EndFilter };
 
     static std::string typeToString(Type t) {
         switch (t) {
@@ -712,6 +712,8 @@ class TemplateToken {
             case Type::EndMacro: return "endmacro";
             case Type::Filter: return "filter";
             case Type::EndFilter: return "endfilter";
+            case Type::Generation: return "generation";
+            case Type::EndGeneration: return "endgeneration";
         }
         return "Unknown";
     }
@@ -788,6 +790,14 @@ struct EndForTemplateToken : public TemplateToken {
     EndForTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndFor, location, pre, post) {}
 };
 
+struct GenerationTemplateToken : public TemplateToken {
+    GenerationTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::Generation, location, pre, post) {}
+};
+
+struct EndGenerationTemplateToken : public TemplateToken {
+    EndGenerationTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post) : TemplateToken(Type::EndGeneration, location, pre, post) {}
+};
+
 struct SetTemplateToken : public TemplateToken {
     std::string ns;
     std::vector<std::string> var_names;
@@ -2149,7 +2159,7 @@ class Parser {
       static std::regex comment_tok(R"(\{#([-~]?)(.*?)([-~]?)#\})");
       static std::regex expr_open_regex(R"(\{\{([-~])?)");
       static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)");
-      static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|set|endset|block|endblock|macro|endmacro|filter|endfilter)\b)");
+      static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter)\b)");
       static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)");
       static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})");
       static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})");
@@ -2229,6 +2239,12 @@ class Parser {
             } else if (keyword == "endfor") {
               auto post_space = parseBlockClose();
               tokens.push_back(std::make_unique<EndForTemplateToken>(location, pre_space, post_space));
+            } else if (keyword == "generation") {
+              auto post_space = parseBlockClose();
+              tokens.push_back(std::make_unique<GenerationTemplateToken>(location, pre_space, post_space));
+            } else if (keyword == "endgeneration") {
+              auto post_space = parseBlockClose();
+              tokens.push_back(std::make_unique<EndGenerationTemplateToken>(location, pre_space, post_space));
             } else if (keyword == "set") {
               static std::regex namespaced_var_regex(R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))");
 
@@ -2330,6 +2346,13 @@ class Parser {
                   throw unterminated(**start);
               }
               children.emplace_back(std::make_shared<ForNode>(token->location, std::move(for_token->var_names), std::move(for_token->iterable), std::move(for_token->condition), std::move(body), for_token->recursive, std::move(else_body)));
+          } else if (dynamic_cast<GenerationTemplateToken*>(token.get())) {
+              auto body = parseTemplate(begin, it, end);
+              if (it == end || (*(it++))->type != TemplateToken::Type::EndGeneration) {
+                  throw unterminated(**start);
+              }
+              // Treat as a no-op, as our scope is templates for inference, not training (`{% generation %}` wraps generated tokens for masking).
+              children.emplace_back(std::move(body));
           } else if (auto text_token = dynamic_cast<TextTemplateToken*>(token.get())) {
               SpaceHandling pre_space = (it - 1) != begin ? (*(it - 2))->post_space : SpaceHandling::Keep;
               SpaceHandling post_space = it != end ? (*it)->pre_space : SpaceHandling::Keep;
@@ -2397,6 +2420,7 @@ class Parser {
                   || dynamic_cast<EndFilterTemplateToken*>(token.get())
                   || dynamic_cast<EndIfTemplateToken*>(token.get())
                   || dynamic_cast<ElseTemplateToken*>(token.get())
+                  || dynamic_cast<EndGenerationTemplateToken*>(token.get())
                   || dynamic_cast<ElifTemplateToken*>(token.get())) {
               it--;  // unconsume the token
               break;  // exit the loop
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ad54b0a..8791f17 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -47,6 +47,7 @@ set(MODEL_IDS
     deepseek-ai/DeepSeek-V2.5
     google/gemma-2-2b-it # Gated
     google/gemma-7b-it # Gated
+    MiniMaxAI/MiniMax-Text-01
     indischepartij/MiniCPM-3B-OpenHermes-2.5-v2
     mattshumer/Reflection-Llama-3.1-70B
     meetkai/functionary-medium-v3.2
diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp
index 41f38ee..21e5ea4 100644
--- a/tests/test-syntax.cpp
+++ b/tests/test-syntax.cpp
@@ -374,6 +374,9 @@ TEST(SyntaxTest, SimpleCases) {
         EXPECT_EQ(
             "[]",
             render(R"({{ None | items | list | tojson }})", {}, {}));
+        EXPECT_EQ(
+            "Foo",
+            render(R"({% generation %}Foo{% endgeneration %})", {}, {}));
     }
     EXPECT_EQ(
         "[[1, 2]]",
@@ -493,6 +496,7 @@ TEST(SyntaxTest, SimpleCases) {
 
         EXPECT_THAT([]() { render("{% if 1 %}", {}, {}); }, ThrowsWithSubstr("Unterminated if"));
         EXPECT_THAT([]() { render("{% for x in 1 %}", {}, {}); }, ThrowsWithSubstr("Unterminated for"));
+        EXPECT_THAT([]() { render("{% generation %}", {}, {}); }, ThrowsWithSubstr("Unterminated generation"));
         EXPECT_THAT([]() { render("{% if 1 %}{% else %}", {}, {}); }, ThrowsWithSubstr("Unterminated if"));
         EXPECT_THAT([]() { render("{% if 1 %}{% else %}{% elif 1 %}{% endif %}", {}, {}); }, ThrowsWithSubstr("Unterminated if"));
         EXPECT_THAT([]() { render("{% filter trim %}", {}, {}); }, ThrowsWithSubstr("Unterminated filter"));

From 10735263459eed595fa330f9a46f9236234d50cc Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Wed, 22 Jan 2025 14:20:45 +0000
Subject: [PATCH 2/2] Detect + adjust when template requires typed messages

---
 include/minja/chat-template.hpp        | 37 +++++++++++++++++++-------
 scripts/fetch_templates_and_goldens.py | 14 ++++++++++
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/include/minja/chat-template.hpp b/include/minja/chat-template.hpp
index 8ffca8f..a89eb55 100644
--- a/include/minja/chat-template.hpp
+++ b/include/minja/chat-template.hpp
@@ -25,6 +25,7 @@ class chat_template {
     // Meta-Llama-3.1-8B-Instruct's template expects arguments to be an object.
     // Most other templates (and OpenAI's API) expect the arguments object to be stringified.
     bool requires_object_arguments_ = false;
+    bool requires_typed_content_ = false;
     bool supports_system_role_ = true;
     bool supports_parallel_tool_calls_ = false;
     std::string source_;
@@ -32,14 +33,14 @@ class chat_template {
     std::string eos_token_;
     std::shared_ptr<minja::TemplateNode> template_root_;
 
-    std::string try_render(
+    std::string try_raw_render(
         const nlohmann::ordered_json & messages,
         const nlohmann::ordered_json & tools,
         bool add_generation_prompt,
         const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
     {
         try {
-            auto prompt = apply(messages, tools, add_generation_prompt, extra_context);
+            auto prompt = apply(messages, tools, add_generation_prompt, extra_context, /* adjust_inputs= */ false);
             // fprintf(stderr, "Prompt: %s\n", prompt.c_str());
             return prompt;
         } catch (const std::exception & e) {
@@ -60,7 +61,7 @@ class chat_template {
         supports_tools_ = source.find("tools") != std::string::npos;
         
         auto renders_string_arguments =
-            try_render({
+            try_raw_render({
                 {
                     {"role", "user"},
                     {"content", "Hey"}
@@ -81,7 +82,7 @@ class chat_template {
             }, {}, false).find("{\"code\": \"print") != std::string::npos;
         if (!renders_string_arguments) {
             auto renders_object_arguments =
-                try_render({
+                try_raw_render({
                     {
                         {"role", "user"},
                         {"content", "Hey"}
@@ -106,10 +107,13 @@ class chat_template {
         }
         supports_parallel_tool_calls_ = source.find("tool_call_id") != std::string::npos;
 
-        supports_system_role_ = try_render({
+        supports_system_role_ = try_raw_render({
             {{"role", "system"}, {"content", "<System Needle>"}},
             {{"role", "user"},   {"content", "Hey"}}
         }, {}, false).find("<System Needle>") != std::string::npos;
+
+        requires_typed_content_ = try_raw_render({{{"role", "user"},   {"content", "Hey"}}}, {}, false).find("Hey") == std::string::npos
+            && try_raw_render({{{"role", "user"},   {"content", {{{"type", "text"}, {"text", "Hey"}}}}}}, {}, false).find("Hey") != std::string::npos;
     }
 
     const std::string & source() const { return source_; }
@@ -122,19 +126,34 @@ class chat_template {
         const nlohmann::ordered_json & messages,
         const nlohmann::ordered_json & tools,
         bool add_generation_prompt,
-        const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
+        const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
+        bool adjust_inputs = true) const
     {
         json actual_messages;
 
         // First, "fix" messages so they have a chance to be rendered correctly by the template
 
-        if (requires_object_arguments_ || !supports_system_role_ || !supports_tools_) {
+        if (adjust_inputs && (requires_object_arguments_ || !supports_system_role_ || !supports_tools_ || requires_typed_content_)) {
             actual_messages = json::array();
 
+            auto add_message = [&](const json & msg) {
+                if (requires_typed_content_ && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
+                    actual_messages.push_back({
+                        {"role", msg.at("role")},
+                        {"content", {{
+                            {"type", "text"},
+                            {"text", msg.at("content")},
+                        }}},
+                    });
+                } else {
+                    actual_messages.push_back(msg);
+                }
+            };
+
             std::string pending_system;
             auto flush_sys = [&]() {
                 if (!pending_system.empty()) {
-                    actual_messages.push_back({
+                    add_message({
                         {"role", "user"},
                         {"content", pending_system},
                     });
@@ -217,7 +236,7 @@ class chat_template {
                         }
                     }
                 }
-                actual_messages.push_back(message);
+                add_message(message);
             }
             flush_sys();
         } else {
diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py
index e8beaa6..5a8348c 100644
--- a/scripts/fetch_templates_and_goldens.py
+++ b/scripts/fetch_templates_and_goldens.py
@@ -56,6 +56,11 @@ def join_cmake_path(parent, child):
     return '/'.join(x.replace(r'\\', '/') for x in (parent, child))
 
 def handle_chat_template(output_folder, model_id, variant, template_src, context_files):
+
+    if '{% generation %}' in template_src:
+        print('Removing {% generation %} blocks from template', file=sys.stderr)
+        template_src = template_src.replace('{% generation %}', '').replace('{% endgeneration %}', '')
+
     model_name = model_id.replace("/", "-")
     base_name = f'{model_name}-{variant}' if variant else model_name
     template_file = join_cmake_path(output_folder, f'{base_name}.jinja')
@@ -126,6 +131,10 @@ def renders(messages, *, tools=[], add_generation_prompt=False, extra_context={}
         {"role": "system", "content": "System Needle"},
         {"role": "user", "content": "Hey"}
     ], extra_context=basic_extra_context, expect_strings=["System Needle"])
+
+    requires_typed_content = \
+        not renders([{"role": "user", "content": "Hey"}], extra_context=basic_extra_context, expect_strings=["Hey"]) \
+        and renders([{"role": "user", "content": [{"type": "text", "text": "Hey"}]}], extra_context=basic_extra_context, expect_strings=["Hey"])
     
     for context_file in context_files:
         context_name = os.path.basename(context_file).replace(".json", "")
@@ -148,6 +157,11 @@ def renders(messages, *, tools=[], add_generation_prompt=False, extra_context={}
                             arguments = tool_call['function']['arguments']
                             tool_call['function']['arguments'] = json.loads(arguments)
 
+        if requires_typed_content:
+            for message in context['messages']:
+                if 'content' in message and isinstance(message['content'], str):
+                    message['content'] = [{"type": "text", "text": message['content']}]
+
         try:
             output = template.render(**context)
         except Exception as e1: