From 863f814102b506dfca05b57273d4a386306bb131 Mon Sep 17 00:00:00 2001 From: ochafik Date: Fri, 6 Dec 2024 09:45:55 +0000 Subject: [PATCH 01/25] build: support using stock python (no venv) --- CMakeLists.txt | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b19d4b..d2aeea3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,14 +13,15 @@ project(minja VERSION 1.0.0 LANGUAGES CXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -option(MINJA_TEST_GATED_MODELS "minja: test gated models" OFF) if (MSVC) set(MINJA_FUZZTEST_ENABLED_DEFAULT OFF) else() set(MINJA_FUZZTEST_ENABLED_DEFAULT ON) endif() -option(MINJA_FUZZTEST_ENABLED "minja: fuzztests enabled" MINJA_FUZZTEST_ENABLED_DEFAULT) +option(MINJA_TEST_GATED_MODELS "minja: test gated models" OFF) +option(MINJA_FUZZTEST_ENABLED "minja: fuzztests enabled" MINJA_FUZZTEST_ENABLED_DEFAULT) option(MINJA_FUZZTEST_FUZZING_MODE "minja: run fuzztests (if enabled) in fuzzing mode" OFF) +option(MINJA_USE_VENV "minja: use Python venv for build" ON) # Note: tests require C++14 because google/fuzztest depends on a version of gtest that requires it # (and we don't want to use an older version of fuzztest) @@ -57,20 +58,24 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) endif() # Create a python venv w/ the required dependencies -find_package(Python3 COMPONENTS Interpreter REQUIRED) -set(VENV_DIR "${CMAKE_BINARY_DIR}/venv") -if(WIN32) - set(VENV_PYTHON "${VENV_DIR}/Scripts/python.exe") -else() - set(VENV_PYTHON "${VENV_DIR}/bin/python") +# Force-set -DPython3_FIND_STRATEGY=LOCATION +set(Python_FIND_STRATEGY LOCATION CACHE STRING "Python find strategy" FORCE) +find_package(Python COMPONENTS Interpreter REQUIRED) +if(MINJA_USE_VENV) + set(VENV_DIR "${CMAKE_BINARY_DIR}/venv") + if(WIN32) + set(VENV_PYTHON "${VENV_DIR}/Scripts/python.exe") + else() + set(VENV_PYTHON "${VENV_DIR}/bin/python") + endif() + execute_process( + COMMAND ${Python3_EXECUTABLE} -m venv "${VENV_DIR}" + COMMAND_ERROR_IS_FATAL ANY) + execute_process( + COMMAND ${VENV_PYTHON} -m pip install -r "${CMAKE_SOURCE_DIR}/requirements.txt" + COMMAND_ERROR_IS_FATAL ANY) + set(Python_EXECUTABLE "${VENV_PYTHON}" CACHE FILEPATH "Path to Python executable in venv" FORCE) endif() -execute_process( - COMMAND ${Python3_EXECUTABLE} -m venv "${VENV_DIR}" - COMMAND_ERROR_IS_FATAL ANY) -execute_process( - COMMAND ${VENV_PYTHON} -m pip install -r "${CMAKE_SOURCE_DIR}/requirements.txt" - COMMAND_ERROR_IS_FATAL ANY) -set(Python_EXECUTABLE "${VENV_PYTHON}" CACHE FILEPATH "Path to Python executable in venv" FORCE) message(STATUS "Python executable: ${Python_EXECUTABLE}") find_program(CPPCHECK cppcheck) From eaec4debf705e4238dea577773516690a485310f Mon Sep 17 00:00:00 2001 From: ochafik Date: Fri, 6 Dec 2024 09:46:05 +0000 Subject: [PATCH 02/25] ci: setup python deps --- .github/workflows/build.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0b45bd8..8ac1c8f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -49,8 +49,17 @@ jobs: - name: Set up CMake uses: lukka/get-cmake@latest + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + + - name: Install Python deps + run: pip install -r requirements.txt + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DMINJA_USE_VENV=0 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config Release --parallel From dc3b42059c9bc01c4dd263ea1b92c56030f9f313 Mon Sep 17 00:00:00 2001 From: ochafik Date: Fri, 6 Dec 2024 09:46:19 +0000 Subject: [PATCH 03/25] models: test Qwen/QwQ-32B-Preview --- tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index eaae929..2b4b52e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -48,6 +48,7 @@ set(MODEL_IDS "Qwen/Qwen2-VL-7B-Instruct" "Qwen/Qwen2.5-7B-Instruct" "Qwen/Qwen2.5-Math-7B-Instruct" + "Qwen/QwQ-32B-Preview" #"bartowski/QwQ-32B-Preview-GGUF" "teknium/OpenHermes-2.5-Mistral-7B" "TheBloke/FusionNet_34Bx2_MoE-AWQ" ) From 41a3983f140794346d55c7a577ba56ce54734351 Mon Sep 17 00:00:00 2001 From: ochafik Date: Fri, 6 Dec 2024 10:03:53 +0000 Subject: [PATCH 04/25] build: require c++20 for tests (designated inits) --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2b4b52e..3a51063 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,7 +7,7 @@ # SPDX-License-Identifier: MIT add_executable(test-syntax test-syntax.cpp) -set_target_properties(test-syntax PROPERTIES CXX_STANDARD 17) +set_target_properties(test-syntax PROPERTIES CXX_STANDARD 20) target_link_libraries(test-syntax PRIVATE nlohmann_json::nlohmann_json gtest_main From 4f26055217fd4f5861cfb12909f97eb1261041ea Mon Sep 17 00:00:00 2001 From: ochafik Date: Fri, 6 Dec 2024 10:10:59 +0000 Subject: [PATCH 05/25] build: tweak flags --- CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index d2aeea3..90bc52b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,14 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() +if (MSVC) + add_compile_options(/W4 /WX) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd") +else() + add_compile_options(-Wall -Wextra -pedantic -Werror) +endif() + # Create a python venv w/ the required dependencies # Force-set -DPython3_FIND_STRATEGY=LOCATION set(Python_FIND_STRATEGY LOCATION CACHE STRING "Python find strategy" FORCE) From 898eb54cac5ec18ad5dd49e33036ec5ba22feea8 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 11:37:51 +0000 Subject: [PATCH 06/25] build: fix venv build --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 90bc52b..571fd13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,7 +77,7 @@ if(MINJA_USE_VENV) set(VENV_PYTHON "${VENV_DIR}/bin/python") endif() execute_process( - COMMAND ${Python3_EXECUTABLE} -m venv "${VENV_DIR}" + COMMAND ${Python_EXECUTABLE} -m venv "${VENV_DIR}" COMMAND_ERROR_IS_FATAL ANY) execute_process( COMMAND ${VENV_PYTHON} -m pip install -r "${CMAKE_SOURCE_DIR}/requirements.txt" From e433134e50d2987f7d8f9f865d6b95c4a38e30ec Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 11:39:08 +0000 Subject: [PATCH 07/25] models: comment out dreamgen/WizardLM-2-7B (unclear where template is) --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8cd918e..5cab8d4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -35,7 +35,6 @@ set(MODEL_IDS deepseek-ai/deepseek-coder-33b-instruct deepseek-ai/DeepSeek-Coder-V2-Instruct deepseek-ai/DeepSeek-V2.5 - dreamgen/WizardLM-2-7B google/gemma-2-2b-it # Gated google/gemma-7b-it # Gated indischepartij/MiniCPM-3B-OpenHermes-2.5-v2 @@ -76,6 +75,7 @@ set(MODEL_IDS # Can't find template(s), TODO: # ai21labs/Jamba-v0.1 # apple/OpenELM-1_1B-Instruct + # dreamgen/WizardLM-2-7B # xai-org/grok-1 ) From abef99e0db5c93d2f87c951828eddf89c6a0ca67 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 11:41:18 +0000 Subject: [PATCH 08/25] build: try and make tests c++17 compatible again for windows build --- tests/CMakeLists.txt | 2 +- tests/test-syntax.cpp | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5cab8d4..f83a401 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,7 +7,7 @@ # SPDX-License-Identifier: MIT add_executable(test-syntax test-syntax.cpp) -set_target_properties(test-syntax PROPERTIES CXX_STANDARD 20) +set_target_properties(test-syntax PROPERTIES CXX_STANDARD 17) target_link_libraries(test-syntax PRIVATE nlohmann_json::nlohmann_json gtest_main diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index 469335a..af35bcb 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -27,19 +27,19 @@ static std::string render(const std::string & template_str, const json & binding } const minja::Options lstrip_blocks { - .trim_blocks = false, - .lstrip_blocks = true, - .keep_trailing_newline = false, + /* .trim_blocks = */ false, + /* .lstrip_blocks = */ true, + /* .keep_trailing_newline = */ false, }; const minja::Options trim_blocks { - .trim_blocks = true, - .lstrip_blocks = false, - .keep_trailing_newline = false, + /* .trim_blocks = */ true, + /* .lstrip_blocks = */ false, + /* .keep_trailing_newline = */ false, }; const minja::Options lstrip_trim_blocks { - .trim_blocks = true, - .lstrip_blocks = true, - .keep_trailing_newline = false, + /* .trim_blocks = */ true, + /* .lstrip_blocks = */ true, + /* .keep_trailing_newline = */ false, }; TEST(SyntaxTest, SimpleCases) { From a424f1dcbaecc58b08627ac39354afdcb4f35617 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 13:39:08 +0000 Subject: [PATCH 09/25] ci: fix serious msvc warning --- include/minja/minja.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp index 979e53f..3b7e244 100644 --- a/include/minja/minja.hpp +++ b/include/minja/minja.hpp @@ -449,7 +449,7 @@ class Value : public std::enable_shared_from_this { Value operator*(const Value& rhs) const { if (is_string() && rhs.is_number_integer()) { std::ostringstream out; - for (int i = 0, n = rhs.get(); i < n; ++i) { + for (int64_t i = 0, n = rhs.get(); i < n; ++i) { out << to_str(); } return out.str(); From 33173f3fd03396c2d89ab517ce82a8db414eb2d3 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 13:44:41 +0000 Subject: [PATCH 10/25] ci: test venv build on ci (except on windows) --- .github/workflows/build.yml | 2 +- CMakeLists.txt | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8ac1c8f..6452d50 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -59,7 +59,7 @@ jobs: run: pip install -r requirements.txt - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DMINJA_USE_VENV=0 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config Release --parallel diff --git a/CMakeLists.txt b/CMakeLists.txt index 571fd13..21d502b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,13 +15,14 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if (MSVC) set(MINJA_FUZZTEST_ENABLED_DEFAULT OFF) + set(MINJA_USE_VENV_DEFAULT OFF) else() set(MINJA_FUZZTEST_ENABLED_DEFAULT ON) + set(MINJA_USE_VENV_DEFAULT ON) endif() -option(MINJA_TEST_GATED_MODELS "minja: test gated models" OFF) option(MINJA_FUZZTEST_ENABLED "minja: fuzztests enabled" MINJA_FUZZTEST_ENABLED_DEFAULT) option(MINJA_FUZZTEST_FUZZING_MODE "minja: run fuzztests (if enabled) in fuzzing mode" OFF) -option(MINJA_USE_VENV "minja: use Python venv for build" ON) +option(MINJA_USE_VENV "minja: use Python venv for build" MINJA_USE_VENV_DEFAULT) # Note: tests require C++14 because google/fuzztest depends on a version of gtest that requires it # (and we don't want to use an older version of fuzztest) From 2cf7a98cd4fc93990b4073deff8329bd431070fc Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 13:45:02 +0000 Subject: [PATCH 11/25] ci: stop treating all warnings as errors on msvc --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21d502b..b56ddd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,6 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) endif() if (MSVC) - add_compile_options(/W4 /WX) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd") else() From a1f02cd051ccdf52a269c56466279a1ae72d7c27 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 13:45:12 +0000 Subject: [PATCH 12/25] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1b0c979..0b9c250 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ It is **not general purpose**: it includes just what’s needed for actual chat > [!WARNING] > TL;DR: use of Minja is *at your own risk*, and the risks are plenty! See [Security & Privacy](#security--privacy) section below. +[![CI](https://github.com/google/minja/actions/workflows/build.yml/badge.svg)](https://github.com/google/minja/actions/workflows/build.yml) + ## Design goals: - Support each and every major LLM found on HuggingFace @@ -199,7 +201,7 @@ Main limitations (non-exhaustive list): - Which version of GCC / clang did you compile the tests with? On which OS version? - If you intend to contribute a fix: - Please read [CONTRIBUTING](./CONTRIBUTING.md) first. You'd have to sign a CLA, which your employer may need to accept. - - Please test as many gated models as possible (use `cmake -B build -DMINJA_TEST_GATED_MODELS=1 ...` and edit `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt) appropriately if you don't have access to some models) + - Please test as many gated models as possible (authenticate w/ `huggingface-cli login` first to test gated models you have access to; see `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt)) - For bonus points, check the style of your edits with: From fccc3523c99cf7c875fec33a02b4351c23dd8ba3 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 13:48:20 +0000 Subject: [PATCH 13/25] ci: add ci badge to readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0b9c250..4b34284 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ It is **not general purpose**: it includes just what’s needed for actual chat ## Design goals: - Support each and every major LLM found on HuggingFace - - See `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt) for the list of models currently supported + - See `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt) for the list of models currently supported - Easy to integrate to/with projects such as [llama.cpp](https://github.com/ggerganov/llama.cpp) or [gemma.cpp](https://github.com/google/gemma.cpp): - Header-only - C++11 @@ -26,7 +26,7 @@ It is **not general purpose**: it includes just what’s needed for actual chat - Address glaring Prompt injection risks in current Jinja chat templating practices. See [Security & Privacy](#security--privacy) below - Additional features from Jinja that aren't used by the template(s) of any major LLM (no feature creep!) - - Please don't submit PRs with such features, they will unfortunately be rejected. + - Please don't submit PRs with such features, they will unfortunately be rejected. - Full Jinja compliance (neither syntax-wise, nor filters / tests / globals) ## Usage: @@ -201,7 +201,7 @@ Main limitations (non-exhaustive list): - Which version of GCC / clang did you compile the tests with? On which OS version? - If you intend to contribute a fix: - Please read [CONTRIBUTING](./CONTRIBUTING.md) first. You'd have to sign a CLA, which your employer may need to accept. - - Please test as many gated models as possible (authenticate w/ `huggingface-cli login` first to test gated models you have access to; see `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt)) + - Please test as many gated models as possible (use `cmake -B build -DMINJA_TEST_GATED_MODELS=1 ...` and edit MODEL_LIST appropriately) - For bonus points, check the style of your edits with: From bef44a10be0c8a202b61cfd2ebadde1f94e30587 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 13:52:16 +0000 Subject: [PATCH 14/25] build: set CMAKE_MSVC_RUNTIME_LIBRARY to link runtime statically (as gtest seems to do) --- CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b56ddd0..6c09027 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,10 +58,8 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() -if (MSVC) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd") -else() +set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded") +if (NOT MSVC) add_compile_options(-Wall -Wextra -pedantic -Werror) endif() From 137b03f2287657d0a899d74f47cf5e800f28df3c Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 13:57:55 +0000 Subject: [PATCH 15/25] ci: try MSVC_RUNTIME_LIBRARY to fix msvc linkage issue --- tests/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f83a401..e59509d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -13,11 +13,17 @@ target_link_libraries(test-syntax PRIVATE gtest_main gmock ) +if (MSVC) + set_target_properties(test-syntax PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded") +endif() gtest_discover_tests(test-syntax) add_executable(test-chat-template test-chat-template.cpp) set_target_properties(test-chat-template PROPERTIES CXX_STANDARD 17) target_link_libraries(test-chat-template PRIVATE nlohmann_json::nlohmann_json) +if (MSVC) + set_target_properties(test-chat-template PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded") +endif() set(MODEL_IDS # List of model IDs to test the chat template of. From b8f47da697595d79390c35abf4486968a91657e6 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 14:06:42 +0000 Subject: [PATCH 16/25] msvc: go dynamic linking --- CMakeLists.txt | 2 +- tests/CMakeLists.txt | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c09027..4c1dc75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() -set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded") +set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") if (NOT MSVC) add_compile_options(-Wall -Wextra -pedantic -Werror) endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e59509d..f83a401 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -13,17 +13,11 @@ target_link_libraries(test-syntax PRIVATE gtest_main gmock ) -if (MSVC) - set_target_properties(test-syntax PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded") -endif() gtest_discover_tests(test-syntax) add_executable(test-chat-template test-chat-template.cpp) set_target_properties(test-chat-template PROPERTIES CXX_STANDARD 17) target_link_libraries(test-chat-template PRIVATE nlohmann_json::nlohmann_json) -if (MSVC) - set_target_properties(test-chat-template PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded") -endif() set(MODEL_IDS # List of model IDs to test the chat template of. From 5751f1460968605365b1fbf8b999dfb64cb147d0 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 14:11:58 +0000 Subject: [PATCH 17/25] msvc: follow gtest instructions for shared runtime https://github.com/google/googletest/blob/main/googletest/README.md --- CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c1dc75..0370163 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,12 @@ option(MINJA_USE_VENV "minja: use Python venv for build" # Examples are built w/ C++11 to check the compatibility of the library. set(CMAKE_CXX_STANDARD 11) +set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +if (NOT MSVC) + add_compile_options(-Wall -Wextra -pedantic -Werror) +endif() + include(FetchContent) # Fetch nlohmann/json @@ -58,11 +64,6 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() -set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") -if (NOT MSVC) - add_compile_options(-Wall -Wextra -pedantic -Werror) -endif() - # Create a python venv w/ the required dependencies # Force-set -DPython3_FIND_STRATEGY=LOCATION set(Python_FIND_STRATEGY LOCATION CACHE STRING "Python find strategy" FORCE) From 8e696502b836ab34c637f368795b39b3b388b1b8 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 14:18:53 +0000 Subject: [PATCH 18/25] build: Default to c++17 for gtest (Examples still built w/ c++11) --- CMakeLists.txt | 5 ++--- tests/CMakeLists.txt | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0370163..d791051 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ option(MINJA_USE_VENV "minja: use Python venv for build" # Note: tests require C++14 because google/fuzztest depends on a version of gtest that requires it # (and we don't want to use an older version of fuzztest) # Examples are built w/ C++11 to check the compatibility of the library. -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) @@ -64,11 +64,10 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() -# Create a python venv w/ the required dependencies -# Force-set -DPython3_FIND_STRATEGY=LOCATION set(Python_FIND_STRATEGY LOCATION CACHE STRING "Python find strategy" FORCE) find_package(Python COMPONENTS Interpreter REQUIRED) if(MINJA_USE_VENV) + # Create a python venv w/ the required dependencies set(VENV_DIR "${CMAKE_BINARY_DIR}/venv") if(WIN32) set(VENV_PYTHON "${VENV_DIR}/Scripts/python.exe") diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f83a401..c8cab48 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,7 +7,6 @@ # SPDX-License-Identifier: MIT add_executable(test-syntax test-syntax.cpp) -set_target_properties(test-syntax PROPERTIES CXX_STANDARD 17) target_link_libraries(test-syntax PRIVATE nlohmann_json::nlohmann_json gtest_main @@ -16,7 +15,6 @@ target_link_libraries(test-syntax PRIVATE gtest_discover_tests(test-syntax) add_executable(test-chat-template test-chat-template.cpp) -set_target_properties(test-chat-template PROPERTIES CXX_STANDARD 17) target_link_libraries(test-chat-template PRIVATE nlohmann_json::nlohmann_json) set(MODEL_IDS From 98c9d97e617b1fc84f1b4e2a8b16afc82db65318 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 14:24:04 +0000 Subject: [PATCH 19/25] ci: try to debug failure to get test cases on windows --- tests/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c8cab48..5a23013 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -87,8 +87,14 @@ execute_process( ${MODEL_IDS} OUTPUT_VARIABLE CHAT_TEMPLATE_TEST_CASES OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY ) string(REPLACE "\n" ";" CHAT_TEMPLATE_TEST_CASES "${CHAT_TEMPLATE_TEST_CASES}") +list(LENGTH CHAT_TEMPLATE_TEST_CASES CHAT_TEMPLATE_TEST_CASES_COUNT) +message(STATUS "Found ${CHAT_TEMPLATE_TEST_CASES_COUNT} chat template test cases") +if (CHAT_TEMPLATE_TEST_CASES_COUNT LESS 10) + message(ERROR "Not enough chat template test cases found") +endif() foreach(test_case ${CHAT_TEMPLATE_TEST_CASES}) separate_arguments(test_args UNIX_COMMAND "${test_case}") list(GET test_args -1 last_arg) From a62533fd10e58f88b1c7e3a0bff72b93148557b6 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 16:35:22 +0000 Subject: [PATCH 20/25] ci: explicit configs for ctests on windows --- .github/workflows/build.yml | 4 ++-- scripts/run_tests.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6452d50..becddd0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -62,8 +62,8 @@ jobs: run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build - run: cmake --build ${{github.workspace}}/build --config Release --parallel + run: cmake --build ${{github.workspace}}/build --config ${{ matrix.type }} --parallel - name: Test working-directory: ${{github.workspace}}/build - run: ctest --test-dir tests --output-on-failure --verbose + run: ctest --test-dir tests --output-on-failure --verbose -C ${{ matrix.type }} diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 36bee3e..a3efbdd 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -13,6 +13,6 @@ # set -euo pipefail -cmake -B build "$@" && \ - cmake --build build -j && \ - ctest --test-dir build -j --output-on-failure +cmake -B build "$@" -DCMAKE_BUILD_TYPE=Release && \ + cmake --build build -j --config Release && \ + ctest --test-dir build -j -C Release --output-on-failure From be36793a0b544f1dd3707490dbc5b7feea3acbff Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 16:46:15 +0000 Subject: [PATCH 21/25] tests: attempt better exception reporting --- tests/test-chat-template.cpp | 86 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 9e4eed7..ce86e77 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -55,58 +55,58 @@ int main(int argc, char *argv[]) { return 1; } - std::string tmpl_file = argv[1]; - std::string ctx_file = argv[2]; - std::string golden_file = argv[3]; - - auto tmpl_str = read_file(tmpl_file); - - if (ctx_file == "n/a") - { - std::cout << "# Skipping template: " << tmpl_file << "\n" << tmpl_str << std::endl; - return 127; - } + try { + std::string tmpl_file = argv[1]; + std::string ctx_file = argv[2]; + std::string golden_file = argv[3]; + + auto tmpl_str = read_file(tmpl_file); + + if (ctx_file == "n/a") + { + std::cout << "# Skipping template: " << tmpl_file << "\n" << tmpl_str << std::endl; + return 127; + } - std::cout << "# Testing template: " << tmpl_file << std::endl - << "# With context: " << ctx_file << std::endl - << "# Against golden file: " << golden_file << std::endl - << std::flush; + std::cout << "# Testing template: " << tmpl_file << std::endl + << "# With context: " << ctx_file << std::endl + << "# Against golden file: " << golden_file << std::endl + << std::flush; - auto ctx = json::parse(read_file(ctx_file)); + auto ctx = json::parse(read_file(ctx_file)); - minja::chat_template tmpl( - tmpl_str, - ctx.at("bos_token"), - ctx.at("eos_token")); + minja::chat_template tmpl( + tmpl_str, + ctx.at("bos_token"), + ctx.at("eos_token")); - std::string expected; - try { - expected = read_file(golden_file); - } catch (const std::runtime_error &e) { - std::cerr << "Failed to read golden file: " << golden_file << std::endl; - std::cerr << e.what() << std::endl; - return 1; - } + std::string expected; + try { + expected = read_file(golden_file); + } catch (const std::exception &e) { + std::cerr << "Failed to read golden file: " << golden_file << std::endl; + std::cerr << e.what() << std::endl; + return 1; + } - std::string actual; - try { - actual = tmpl.apply( - ctx.at("messages"), - ctx.contains("tools") ? ctx.at("tools") : json(), - ctx.at("add_generation_prompt"), - ctx.contains("tools") ? json{ - {"builtin_tools", {"wolfram_alpha", "brave_search"}}} - : json()); - } catch (const std::runtime_error &e) { - std::cerr << "Error applying template: " << e.what() << std::endl; - return 1; - } + std::string actual; + try { + actual = tmpl.apply( + ctx.at("messages"), + ctx.contains("tools") ? ctx.at("tools") : json(), + ctx.at("add_generation_prompt"), + ctx.contains("tools") ? json{ + {"builtin_tools", {"wolfram_alpha", "brave_search"}}} + : json()); + } catch (const std::exception &e) { + std::cerr << "Error applying template: " << e.what() << std::endl; + return 1; + } - try { assert_equals(expected, actual); std::cout << "Test passed successfully." << std::endl; return 0; - } catch (const std::runtime_error &e) { + } catch (const std::exception &e) { std::cerr << "Test failed: " << e.what() << std::endl; return 1; } From aa7596e846b8f47045e9c79a3fe2001acb7bb217 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 17:34:54 +0000 Subject: [PATCH 22/25] ci: avoid cmake backslash escapes of paths --- scripts/fetch_templates_and_goldens.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index 3879e2f..9c5e056 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -52,7 +52,8 @@ def strftime_now(format): def handle_chat_template(output_folder, model_id, variant, template_src, context_files): model_name = model_id.replace("/", "-") base_name = f'{model_name}-{variant}' if variant else model_name - template_file = os.path.join(output_folder, f'{base_name}.jinja') + # On Windows, CMake will interpret any backslashes as escapes so we return / for path separators + template_file = os.path.join(output_folder, f'{base_name}.jinja').replace(r'\\', r'/') with open(template_file, 'w') as f: f.write(template_src) From cd7310019cc262b39c4b4c927b3f6c2cc0f03cc3 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 18:12:48 +0000 Subject: [PATCH 23/25] ci: attempt to defeat backslashes --- scripts/fetch_templates_and_goldens.py | 2 +- tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index 9c5e056..444a48b 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -53,7 +53,7 @@ def handle_chat_template(output_folder, model_id, variant, template_src, context model_name = model_id.replace("/", "-") base_name = f'{model_name}-{variant}' if variant else model_name # On Windows, CMake will interpret any backslashes as escapes so we return / for path separators - template_file = os.path.join(output_folder, f'{base_name}.jinja').replace(r'\\', r'/') + template_file = output_folder.replace(r'\\', '/') + '/' + f'{base_name}.jinja' with open(template_file, 'w') as f: f.write(template_src) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5a23013..b311af5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -98,7 +98,7 @@ endif() foreach(test_case ${CHAT_TEMPLATE_TEST_CASES}) separate_arguments(test_args UNIX_COMMAND "${test_case}") list(GET test_args -1 last_arg) - string(REGEX REPLACE "^[^ ]+/([^ /]+)\\.[^.]+$" "\\1" test_name "${last_arg}") + string(REGEX REPLACE "^[^ ]+/([^ /\\]+)\\.[^.]+$" "\\1" test_name "${last_arg}") add_test(NAME ${test_name} COMMAND $ ${test_args}) set_tests_properties(${test_name} PROPERTIES SKIP_RETURN_CODE 127) endforeach() From e3ac311cba8f4dd3aa497d4267f75b5c2678cc1b Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 18:18:38 +0000 Subject: [PATCH 24/25] ci: generalize backslash-averse path joining in test case generation --- scripts/fetch_templates_and_goldens.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py index 444a48b..d4fce98 100644 --- a/scripts/fetch_templates_and_goldens.py +++ b/scripts/fetch_templates_and_goldens.py @@ -49,11 +49,16 @@ def strftime_now(format): return now.strftime(format) +def join_cmake_path(parent, child): + ''' + On Windows, CMake will interpret any backslashes as escapes so we return / for path separators + ''' + return '/'.join(x.replace(r'\\', '/') for x in (parent, child)) + def handle_chat_template(output_folder, model_id, variant, template_src, context_files): model_name = model_id.replace("/", "-") base_name = f'{model_name}-{variant}' if variant else model_name - # On Windows, CMake will interpret any backslashes as escapes so we return / for path separators - template_file = output_folder.replace(r'\\', '/') + '/' + f'{base_name}.jinja' + template_file = join_cmake_path(output_folder, f'{base_name}.jinja') with open(template_file, 'w') as f: f.write(template_src) @@ -88,7 +93,7 @@ def handle_chat_template(output_folder, model_id, variant, template_src, context if template_hates_the_system and any(m['role'] == 'system' for m in context['messages']): continue - output_file = os.path.join(output_folder, f'{base_name}-{context_name}.txt') + output_file = join_cmake_path(output_folder, f'{base_name}-{context_name}.txt') render_context = json.loads(json.dumps(context)) From 3894b12534ccfa78452370ea89911997053ac76d Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 6 Dec 2024 18:27:36 +0000 Subject: [PATCH 25/25] ci: disable windows ci for now (https://github.com/google/minja/issues/9) --- .github/workflows/build.yml | 4 ++-- tests/test-syntax.cpp | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index becddd0..43fe2bb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,11 +26,11 @@ jobs: # ubuntu-22.04, ubuntu-latest, # windows-2019, - windows-latest, + # windows-latest, ] type: [ Release, - # Debug, + Debug, ] runs-on: ${{ matrix.os }} timeout-minutes: 30 diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp index af35bcb..540ad83 100644 --- a/tests/test-syntax.cpp +++ b/tests/test-syntax.cpp @@ -43,6 +43,10 @@ const minja::Options lstrip_trim_blocks { }; TEST(SyntaxTest, SimpleCases) { + EXPECT_EQ( + "\r\nhey\r\nho!", + render("\r\n{{ 'hey\r\nho!' }}\r\n", {}, {})); + EXPECT_EQ( "a\n b\n| a\n b\n", render("{% set txt = 'a\\nb\\n' %}{{ txt | indent(2) }}|{{ txt | indent(2, first=true) }}", {}, {}));