From 0fd12b8e8e7ed2efbc4b09ce40f8d8f30d7ecd56 Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Fri, 6 Dec 2024 11:13:05 +0000
Subject: [PATCH 1/3] build: loudly skip tests we fail to get templates of
 (drop test gated flag)

---
 CMakeLists.txt                         |  1 -
 scripts/fetch_templates_and_goldens.py |  5 ++
 tests/CMakeLists.txt                   | 83 +++++++++++++-------------
 tests/test-chat-template.cpp           |  9 ++-
 4 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9b19d4b..64d7355 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,6 @@ project(minja VERSION 1.0.0 LANGUAGES CXX)
 
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
-option(MINJA_TEST_GATED_MODELS    "minja: test gated models"    OFF)
 if (MSVC)
     set(MINJA_FUZZTEST_ENABLED_DEFAULT OFF)
 else()
diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py
index 01447a5..c2fc0cf 100644
--- a/scripts/fetch_templates_and_goldens.py
+++ b/scripts/fetch_templates_and_goldens.py
@@ -57,6 +57,10 @@ def handle_chat_template(output_folder, model_id, variant, template_src, context
     with open(template_file, 'w') as f:
         f.write(template_src)
 
+    if not context_files:
+        print(f"{template_file} n/a {template_file}")
+        return
+
     env = jinja2.Environment(
         trim_blocks=True,
         lstrip_blocks=True,
@@ -155,6 +159,7 @@ def main():
                     handle_chat_template(output_folder, model_id, ct['name'], ct['template'], context_files)
         except Exception as e:
             logger.error(f"Error processing model {model_id}: {e}")
+            handle_chat_template(output_folder, model_id, None, str(e), [])
 
 
 if __name__ == '__main__':
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index eaae929..eb5311a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -25,48 +25,48 @@ set(MODEL_IDS
     # will be used to render each of the (relevant) test contexts into a golden file with
     # the official Python jinja2 library. Then a test case will be created to run the C++
     # minja implementation on the same template and context, and compare the output with the golden.
-    "abacusai/Fewshot-Metamath-OrcaVicuna-Mistral"
-    "bofenghuang/vigogne-2-70b-chat"
-    "deepseek-ai/deepseek-coder-33b-instruct"
-    "deepseek-ai/DeepSeek-Coder-V2-Instruct"
-    "deepseek-ai/DeepSeek-V2.5"
-    "indischepartij/MiniCPM-3B-OpenHermes-2.5-v2"
-    "meetkai/functionary-medium-v3.1"
-    "meetkai/functionary-medium-v3.2"
-    "microsoft/Phi-3-medium-4k-instruct"
-    "microsoft/Phi-3-mini-4k-instruct"
-    "microsoft/Phi-3-small-8k-instruct"
-    "microsoft/Phi-3.5-mini-instruct"
-    "microsoft/Phi-3.5-vision-instruct"
-    "mlabonne/AlphaMonarch-7B"
-    "NousResearch/Hermes-2-Pro-Llama-3-8B"
-    "NousResearch/Hermes-2-Pro-Mistral-7B"
-    "NousResearch/Hermes-3-Llama-3.1-70B"
-    "openchat/openchat-3.5-0106"
-    "OrionStarAI/Orion-14B-Chat"
-    "Qwen/Qwen2-7B-Instruct"
-    "Qwen/Qwen2-VL-7B-Instruct"
-    "Qwen/Qwen2.5-7B-Instruct"
-    "Qwen/Qwen2.5-Math-7B-Instruct"
-    "teknium/OpenHermes-2.5-Mistral-7B"
-    "TheBloke/FusionNet_34Bx2_MoE-AWQ"
-)
+    abacusai/Fewshot-Metamath-OrcaVicuna-Mistral
+    bofenghuang/vigogne-2-70b-chat
+    deepseek-ai/deepseek-coder-33b-instruct
+    deepseek-ai/DeepSeek-Coder-V2-Instruct
+    deepseek-ai/DeepSeek-V2.5
+    indischepartij/MiniCPM-3B-OpenHermes-2.5-v2
+    meetkai/functionary-medium-v3.1
+    meetkai/functionary-medium-v3.2
+    microsoft/Phi-3-medium-4k-instruct
+    microsoft/Phi-3-mini-4k-instruct
+    microsoft/Phi-3-small-8k-instruct
+    microsoft/Phi-3.5-mini-instruct
+    microsoft/Phi-3.5-vision-instruct
+    mlabonne/AlphaMonarch-7B
+    NousResearch/Hermes-2-Pro-Llama-3-8B
+    NousResearch/Hermes-2-Pro-Mistral-7B
+    NousResearch/Hermes-3-Llama-3.1-70B
+    openchat/openchat-3.5-0106
+    OrionStarAI/Orion-14B-Chat
+    Qwen/Qwen2-7B-Instruct
+    Qwen/Qwen2-VL-7B-Instruct
+    Qwen/Qwen2.5-7B-Instruct
+    Qwen/Qwen2.5-Math-7B-Instruct
+    teknium/OpenHermes-2.5-Mistral-7B
+    TheBloke/FusionNet_34Bx2_MoE-AWQ
 
-# Gated models: you will need to run `huggingface-cli login` (and be granted access) to download these
-if (MINJA_TEST_GATED_MODELS)
-    list(APPEND MODEL_IDS
-        "meta-llama/Llama-3.2-3B-Instruct"
-        "meta-llama/Meta-Llama-3.1-8B-Instruct"
-        "google/gemma-7b-it"
-        "google/gemma-2-2b-it"
-        "mistralai/Mistral-7B-Instruct-v0.2"
-        "mistralai/Mixtral-8x7B-Instruct-v0.1"
-        "mistralai/Mistral-Large-Instruct-2407"
-        "mistralai/Mistral-Large-Instruct-2411"
-        "mistralai/Mistral-Nemo-Instruct-2407"
-        "CohereForAI/c4ai-command-r-plus"
-    )
-endif()
+    # Gated models: you will need to run `huggingface-cli login` (and be granted access) to download these
+    CohereForAI/c4ai-command-r-plus
+    NexaAIDev/Octopus-v2
+    databricks/dbrx-instruct
+    meta-llama/Llama-3.2-3B-Instruct
+    meta-llama/Meta-Llama-3.1-8B-Instruct
+    google/gemma-7b-it
+    google/gemma-2-2b-it
+    mattshumer/Reflection-Llama-3.1-70B
+    mistralai/Mistral-7B-Instruct-v0.2
+    mistralai/Mixtral-8x7B-Instruct-v0.1
+    mistralai/Mistral-Large-Instruct-2407
+    mistralai/Mistral-Large-Instruct-2411
+    mistralai/Mistral-Nemo-Instruct-2407
+    nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
+)
 
 # Create one test case for each {template, context} combination
 file(GLOB CONTEXT_FILES "${CMAKE_SOURCE_DIR}/tests/contexts/*.json")
@@ -85,6 +85,7 @@ foreach(test_case ${CHAT_TEMPLATE_TEST_CASES})
     list(GET test_args -1 last_arg)
     string(REGEX REPLACE "^[^ ]+/([^ /]+)\\.[^.]+$" "\\1" test_name "${last_arg}")
     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:test-chat-template> ${test_args})
+    set_tests_properties(${test_name} PROPERTIES SKIP_RETURN_CODE 127)
 endforeach()
 
 if (MINJA_FUZZTEST_ENABLED)
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index 3788a64..9e4eed7 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -58,13 +58,20 @@ int main(int argc, char *argv[]) {
     std::string tmpl_file = argv[1];
     std::string ctx_file = argv[2];
     std::string golden_file = argv[3];
+    
+    auto tmpl_str = read_file(tmpl_file);
+    
+    if (ctx_file == "n/a")
+    {
+        std::cout << "# Skipping template: " << tmpl_file << "\n" << tmpl_str << std::endl;
+        return 127;
+    }
 
     std::cout << "# Testing template: " << tmpl_file << std::endl
               << "# With context: " << ctx_file << std::endl
               << "# Against golden file: " << golden_file << std::endl
               << std::flush;
 
-    auto tmpl_str = read_file(tmpl_file);
     auto ctx = json::parse(read_file(ctx_file));
 
     minja::chat_template tmpl(

From 3af6f851336c9d15dcab54a9d87dc75ef0a8e0fd Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Fri, 6 Dec 2024 11:25:14 +0000
Subject: [PATCH 2/3] models: test more models, loosely tag gated models as a
 comment

---
 tests/CMakeLists.txt | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index eb5311a..5472f1e 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -25,23 +25,44 @@ set(MODEL_IDS
     # will be used to render each of the (relevant) test contexts into a golden file with
     # the official Python jinja2 library. Then a test case will be created to run the C++
     # minja implementation on the same template and context, and compare the output with the golden.
+    #
+    # For Gated models, you'll need to run `huggingface-cli login` (and be granted access) to download their template.
+    
+
     abacusai/Fewshot-Metamath-OrcaVicuna-Mistral
+    apple/OpenELM-1_1B-Instruct
     bofenghuang/vigogne-2-70b-chat
+    CohereForAI/c4ai-command-r-plus # Gated
+    databricks/dbrx-instruct # Gated
     deepseek-ai/deepseek-coder-33b-instruct
     deepseek-ai/DeepSeek-Coder-V2-Instruct
     deepseek-ai/DeepSeek-V2.5
+    dreamgen/WizardLM-2-7B
+    # fireworks-ai/llama-3-firefunction-v2 # Broken, TODO!
+    google/gemma-2-2b-it # Gated
+    google/gemma-7b-it # Gated
     indischepartij/MiniCPM-3B-OpenHermes-2.5-v2
+    mattshumer/Reflection-Llama-3.1-70B
     meetkai/functionary-medium-v3.1
     meetkai/functionary-medium-v3.2
+    meta-llama/Llama-3.2-3B-Instruct # Gated
+    meta-llama/Meta-Llama-3.1-8B-Instruct # Gated
     microsoft/Phi-3-medium-4k-instruct
     microsoft/Phi-3-mini-4k-instruct
     microsoft/Phi-3-small-8k-instruct
     microsoft/Phi-3.5-mini-instruct
     microsoft/Phi-3.5-vision-instruct
+    mistralai/Mistral-7B-Instruct-v0.2 # Gated
+    mistralai/Mistral-Large-Instruct-2407 # Gated
+    mistralai/Mistral-Large-Instruct-2411 # Gated
+    mistralai/Mistral-Nemo-Instruct-2407 # Gated
+    mistralai/Mixtral-8x7B-Instruct-v0.1 # Gated
     mlabonne/AlphaMonarch-7B
+    NexaAIDev/Octopus-v2
     NousResearch/Hermes-2-Pro-Llama-3-8B
     NousResearch/Hermes-2-Pro-Mistral-7B
     NousResearch/Hermes-3-Llama-3.1-70B
+    nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
     openchat/openchat-3.5-0106
     OrionStarAI/Orion-14B-Chat
     Qwen/Qwen2-7B-Instruct
@@ -50,22 +71,6 @@ set(MODEL_IDS
     Qwen/Qwen2.5-Math-7B-Instruct
     teknium/OpenHermes-2.5-Mistral-7B
     TheBloke/FusionNet_34Bx2_MoE-AWQ
-
-    # Gated models: you will need to run `huggingface-cli login` (and be granted access) to download these
-    CohereForAI/c4ai-command-r-plus
-    NexaAIDev/Octopus-v2
-    databricks/dbrx-instruct
-    meta-llama/Llama-3.2-3B-Instruct
-    meta-llama/Meta-Llama-3.1-8B-Instruct
-    google/gemma-7b-it
-    google/gemma-2-2b-it
-    mattshumer/Reflection-Llama-3.1-70B
-    mistralai/Mistral-7B-Instruct-v0.2
-    mistralai/Mixtral-8x7B-Instruct-v0.1
-    mistralai/Mistral-Large-Instruct-2407
-    mistralai/Mistral-Large-Instruct-2411
-    mistralai/Mistral-Nemo-Instruct-2407
-    nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
 )
 
 # Create one test case for each {template, context} combination

From 89c484700391446dad32a009ab410bf051a7b152 Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Fri, 6 Dec 2024 11:34:19 +0000
Subject: [PATCH 3/3] models: restructure list of models (can't find templates
 of a few popular ones)

---
 scripts/fetch_templates_and_goldens.py |  1 +
 tests/CMakeLists.txt                   | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py
index c2fc0cf..3879e2f 100644
--- a/scripts/fetch_templates_and_goldens.py
+++ b/scripts/fetch_templates_and_goldens.py
@@ -151,6 +151,7 @@ def main():
             except json.JSONDecodeError:
                 config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str))
 
+            assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json!'
             chat_template = config['chat_template']
             if isinstance(chat_template, str):
                 handle_chat_template(output_folder, model_id, None, chat_template, context_files)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 5472f1e..1ace569 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -28,9 +28,7 @@ set(MODEL_IDS
     #
     # For Gated models, you'll need to run `huggingface-cli login` (and be granted access) to download their template.
     
-
     abacusai/Fewshot-Metamath-OrcaVicuna-Mistral
-    apple/OpenELM-1_1B-Instruct
     bofenghuang/vigogne-2-70b-chat
     CohereForAI/c4ai-command-r-plus # Gated
     databricks/dbrx-instruct # Gated
@@ -38,7 +36,6 @@ set(MODEL_IDS
     deepseek-ai/DeepSeek-Coder-V2-Instruct
     deepseek-ai/DeepSeek-V2.5
     dreamgen/WizardLM-2-7B
-    # fireworks-ai/llama-3-firefunction-v2 # Broken, TODO!
     google/gemma-2-2b-it # Gated
     google/gemma-7b-it # Gated
     indischepartij/MiniCPM-3B-OpenHermes-2.5-v2
@@ -71,6 +68,14 @@ set(MODEL_IDS
     Qwen/Qwen2.5-Math-7B-Instruct
     teknium/OpenHermes-2.5-Mistral-7B
     TheBloke/FusionNet_34Bx2_MoE-AWQ
+
+    # Broken, TODO:
+    # fireworks-ai/llama-3-firefunction-v2 
+
+    # Can't find template(s), TODO:
+    # ai21labs/Jamba-v0.1
+    # apple/OpenELM-1_1B-Instruct 
+    # xai-org/grok-1
 )
 
 # Create one test case for each {template, context} combination