From 0fd12b8e8e7ed2efbc4b09ce40f8d8f30d7ecd56 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 11:13:05 +0000 Subject: [PATCH 1/3] build: loudly skip tests we fail to get templates of (drop test gated flag) --- CMakeLists.txt | 1 - scripts/fetch_templates_and_goldens.py | 5 ++ tests/CMakeLists.txt | 83 +++++++++++++------------- tests/test-chat-template.cpp | 9 ++- 4 files changed, 55 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b19d4b..64d7355 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,6 @@ project(minja VERSION 1.0.0 LANGUAGES CXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -option(MINJA_TEST_GATED_MODELS "minja: test gated models" OFF) if (MSVC) set(MINJA_FUZZTEST_ENABLED_DEFAULT OFF) else() diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index 01447a5..c2fc0cf 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -57,6 +57,10 @@ def handle_chat_template(output_folder, model_id, variant, template_src, context with open(template_file, 'w') as f: f.write(template_src) + if not context_files: + print(f"{template_file} n/a {template_file}") + return + env = jinja2.Environment( trim_blocks=True, lstrip_blocks=True, @@ -155,6 +159,7 @@ def main(): handle_chat_template(output_folder, model_id, ct['name'], ct['template'], context_files) except Exception as e: logger.error(f"Error processing model {model_id}: {e}") + handle_chat_template(output_folder, model_id, None, str(e), []) if __name__ == '__main__': diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index eaae929..eb5311a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -25,48 +25,48 @@ set(MODEL_IDS # will be used to render each of the (relevant) test contexts into a golden file with # the official Python jinja2 library. Then a test case will be created to run the C++ # minja implementation on the same template and context, and compare the output with the golden. - "abacusai/Fewshot-Metamath-OrcaVicuna-Mistral" - "bofenghuang/vigogne-2-70b-chat" - "deepseek-ai/deepseek-coder-33b-instruct" - "deepseek-ai/DeepSeek-Coder-V2-Instruct" - "deepseek-ai/DeepSeek-V2.5" - "indischepartij/MiniCPM-3B-OpenHermes-2.5-v2" - "meetkai/functionary-medium-v3.1" - "meetkai/functionary-medium-v3.2" - "microsoft/Phi-3-medium-4k-instruct" - "microsoft/Phi-3-mini-4k-instruct" - "microsoft/Phi-3-small-8k-instruct" - "microsoft/Phi-3.5-mini-instruct" - "microsoft/Phi-3.5-vision-instruct" - "mlabonne/AlphaMonarch-7B" - "NousResearch/Hermes-2-Pro-Llama-3-8B" - "NousResearch/Hermes-2-Pro-Mistral-7B" - "NousResearch/Hermes-3-Llama-3.1-70B" - "openchat/openchat-3.5-0106" - "OrionStarAI/Orion-14B-Chat" - "Qwen/Qwen2-7B-Instruct" - "Qwen/Qwen2-VL-7B-Instruct" - "Qwen/Qwen2.5-7B-Instruct" - "Qwen/Qwen2.5-Math-7B-Instruct" - "teknium/OpenHermes-2.5-Mistral-7B" - "TheBloke/FusionNet_34Bx2_MoE-AWQ" -) + abacusai/Fewshot-Metamath-OrcaVicuna-Mistral + bofenghuang/vigogne-2-70b-chat + deepseek-ai/deepseek-coder-33b-instruct + deepseek-ai/DeepSeek-Coder-V2-Instruct + deepseek-ai/DeepSeek-V2.5 + indischepartij/MiniCPM-3B-OpenHermes-2.5-v2 + meetkai/functionary-medium-v3.1 + meetkai/functionary-medium-v3.2 + microsoft/Phi-3-medium-4k-instruct + microsoft/Phi-3-mini-4k-instruct + microsoft/Phi-3-small-8k-instruct + microsoft/Phi-3.5-mini-instruct + microsoft/Phi-3.5-vision-instruct + mlabonne/AlphaMonarch-7B + NousResearch/Hermes-2-Pro-Llama-3-8B + NousResearch/Hermes-2-Pro-Mistral-7B + NousResearch/Hermes-3-Llama-3.1-70B + openchat/openchat-3.5-0106 + OrionStarAI/Orion-14B-Chat + Qwen/Qwen2-7B-Instruct + Qwen/Qwen2-VL-7B-Instruct + Qwen/Qwen2.5-7B-Instruct + Qwen/Qwen2.5-Math-7B-Instruct + teknium/OpenHermes-2.5-Mistral-7B + TheBloke/FusionNet_34Bx2_MoE-AWQ -# Gated models: you will need to run `huggingface-cli login` (and be granted access) to download these -if (MINJA_TEST_GATED_MODELS) - list(APPEND MODEL_IDS - "meta-llama/Llama-3.2-3B-Instruct" - "meta-llama/Meta-Llama-3.1-8B-Instruct" - "google/gemma-7b-it" - "google/gemma-2-2b-it" - "mistralai/Mistral-7B-Instruct-v0.2" - "mistralai/Mixtral-8x7B-Instruct-v0.1" - "mistralai/Mistral-Large-Instruct-2407" - "mistralai/Mistral-Large-Instruct-2411" - "mistralai/Mistral-Nemo-Instruct-2407" - "CohereForAI/c4ai-command-r-plus" - ) -endif() + # Gated models: you will need to run `huggingface-cli login` (and be granted access) to download these + CohereForAI/c4ai-command-r-plus + NexaAIDev/Octopus-v2 + databricks/dbrx-instruct + meta-llama/Llama-3.2-3B-Instruct + meta-llama/Meta-Llama-3.1-8B-Instruct + google/gemma-7b-it + google/gemma-2-2b-it + mattshumer/Reflection-Llama-3.1-70B + mistralai/Mistral-7B-Instruct-v0.2 + mistralai/Mixtral-8x7B-Instruct-v0.1 + mistralai/Mistral-Large-Instruct-2407 + mistralai/Mistral-Large-Instruct-2411 + mistralai/Mistral-Nemo-Instruct-2407 + nvidia/Llama-3.1-Nemotron-70B-Instruct-HF +) # Create one test case for each {template, context} combination file(GLOB CONTEXT_FILES "${CMAKE_SOURCE_DIR}/tests/contexts/*.json") @@ -85,6 +85,7 @@ foreach(test_case ${CHAT_TEMPLATE_TEST_CASES}) list(GET test_args -1 last_arg) string(REGEX REPLACE "^[^ ]+/([^ /]+)\\.[^.]+$" "\\1" test_name "${last_arg}") add_test(NAME ${test_name} COMMAND $ ${test_args}) + set_tests_properties(${test_name} PROPERTIES SKIP_RETURN_CODE 127) endforeach() if (MINJA_FUZZTEST_ENABLED) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 3788a64..9e4eed7 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -58,13 +58,20 @@ int main(int argc, char *argv[]) { std::string tmpl_file = argv[1]; std::string ctx_file = argv[2]; std::string golden_file = argv[3]; + + auto tmpl_str = read_file(tmpl_file); + + if (ctx_file == "n/a") + { + std::cout << "# Skipping template: " << tmpl_file << "\n" << tmpl_str << std::endl; + return 127; + } std::cout << "# Testing template: " << tmpl_file << std::endl << "# With context: " << ctx_file << std::endl << "# Against golden file: " << golden_file << std::endl << std::flush; - auto tmpl_str = read_file(tmpl_file); auto ctx = json::parse(read_file(ctx_file)); minja::chat_template tmpl( From 3af6f851336c9d15dcab54a9d87dc75ef0a8e0fd Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 11:25:14 +0000 Subject: [PATCH 2/3] models: test more models, loosely tag gated models as a comment --- tests/CMakeLists.txt | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index eb5311a..5472f1e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -25,23 +25,44 @@ set(MODEL_IDS # will be used to render each of the (relevant) test contexts into a golden file with # the official Python jinja2 library. Then a test case will be created to run the C++ # minja implementation on the same template and context, and compare the output with the golden. + # + # For Gated models, you'll need to run `huggingface-cli login` (and be granted access) to download their template. + + abacusai/Fewshot-Metamath-OrcaVicuna-Mistral + apple/OpenELM-1_1B-Instruct bofenghuang/vigogne-2-70b-chat + CohereForAI/c4ai-command-r-plus # Gated + databricks/dbrx-instruct # Gated deepseek-ai/deepseek-coder-33b-instruct deepseek-ai/DeepSeek-Coder-V2-Instruct deepseek-ai/DeepSeek-V2.5 + dreamgen/WizardLM-2-7B + # fireworks-ai/llama-3-firefunction-v2 # Broken, TODO! + google/gemma-2-2b-it # Gated + google/gemma-7b-it # Gated indischepartij/MiniCPM-3B-OpenHermes-2.5-v2 + mattshumer/Reflection-Llama-3.1-70B meetkai/functionary-medium-v3.1 meetkai/functionary-medium-v3.2 + meta-llama/Llama-3.2-3B-Instruct # Gated + meta-llama/Meta-Llama-3.1-8B-Instruct # Gated microsoft/Phi-3-medium-4k-instruct microsoft/Phi-3-mini-4k-instruct microsoft/Phi-3-small-8k-instruct microsoft/Phi-3.5-mini-instruct microsoft/Phi-3.5-vision-instruct + mistralai/Mistral-7B-Instruct-v0.2 # Gated + mistralai/Mistral-Large-Instruct-2407 # Gated + mistralai/Mistral-Large-Instruct-2411 # Gated + mistralai/Mistral-Nemo-Instruct-2407 # Gated + mistralai/Mixtral-8x7B-Instruct-v0.1 # Gated mlabonne/AlphaMonarch-7B + NexaAIDev/Octopus-v2 NousResearch/Hermes-2-Pro-Llama-3-8B NousResearch/Hermes-2-Pro-Mistral-7B NousResearch/Hermes-3-Llama-3.1-70B + nvidia/Llama-3.1-Nemotron-70B-Instruct-HF openchat/openchat-3.5-0106 OrionStarAI/Orion-14B-Chat Qwen/Qwen2-7B-Instruct @@ -50,22 +71,6 @@ set(MODEL_IDS Qwen/Qwen2.5-Math-7B-Instruct teknium/OpenHermes-2.5-Mistral-7B TheBloke/FusionNet_34Bx2_MoE-AWQ - - # Gated models: you will need to run `huggingface-cli login` (and be granted access) to download these - CohereForAI/c4ai-command-r-plus - NexaAIDev/Octopus-v2 - databricks/dbrx-instruct - meta-llama/Llama-3.2-3B-Instruct - meta-llama/Meta-Llama-3.1-8B-Instruct - google/gemma-7b-it - google/gemma-2-2b-it - mattshumer/Reflection-Llama-3.1-70B - mistralai/Mistral-7B-Instruct-v0.2 - mistralai/Mixtral-8x7B-Instruct-v0.1 - mistralai/Mistral-Large-Instruct-2407 - mistralai/Mistral-Large-Instruct-2411 - mistralai/Mistral-Nemo-Instruct-2407 - nvidia/Llama-3.1-Nemotron-70B-Instruct-HF ) # Create one test case for each {template, context} combination From 89c484700391446dad32a009ab410bf051a7b152 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 11:34:19 +0000 Subject: [PATCH 3/3] models: restructure list of models (can't find templates of a few popular ones) --- scripts/fetch_templates_and_goldens.py | 1 + tests/CMakeLists.txt | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index c2fc0cf..3879e2f 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -151,6 +151,7 @@ def main(): except json.JSONDecodeError: config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str)) + assert 'chat_template' in config, 'No "chat_template" entry in tokenizer_config.json!' chat_template = config['chat_template'] if isinstance(chat_template, str): handle_chat_template(output_folder, model_id, None, chat_template, context_files) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5472f1e..1ace569 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -28,9 +28,7 @@ set(MODEL_IDS # # For Gated models, you'll need to run `huggingface-cli login` (and be granted access) to download their template. - abacusai/Fewshot-Metamath-OrcaVicuna-Mistral - apple/OpenELM-1_1B-Instruct bofenghuang/vigogne-2-70b-chat CohereForAI/c4ai-command-r-plus # Gated databricks/dbrx-instruct # Gated @@ -38,7 +36,6 @@ set(MODEL_IDS deepseek-ai/DeepSeek-Coder-V2-Instruct deepseek-ai/DeepSeek-V2.5 dreamgen/WizardLM-2-7B - # fireworks-ai/llama-3-firefunction-v2 # Broken, TODO! google/gemma-2-2b-it # Gated google/gemma-7b-it # Gated indischepartij/MiniCPM-3B-OpenHermes-2.5-v2 @@ -71,6 +68,14 @@ set(MODEL_IDS Qwen/Qwen2.5-Math-7B-Instruct teknium/OpenHermes-2.5-Mistral-7B TheBloke/FusionNet_34Bx2_MoE-AWQ + + # Broken, TODO: + # fireworks-ai/llama-3-firefunction-v2 + + # Can't find template(s), TODO: + # ai21labs/Jamba-v0.1 + # apple/OpenELM-1_1B-Instruct + # xai-org/grok-1 ) # Create one test case for each {template, context} combination