google · ochafik · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -26,11 +26,11 @@ jobs:
           # ubuntu-22.04,
           ubuntu-latest,
           # windows-2019,
-          windows-latest,
+          # windows-latest,
         ]
         type: [
           Release,
-          # Debug,
+          Debug,
         ]
     runs-on:  ${{ matrix.os }}
     timeout-minutes: 30
@@ -49,12 +49,21 @@ jobs:
       - name: Set up CMake
         uses: lukka/get-cmake@latest
 
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Install Python deps
+        run: pip install -r requirements.txt
+
       - name: Configure CMake
         run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{ matrix.type }} -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
 
       - name: Build
-        run: cmake --build ${{github.workspace}}/build --config Release --parallel
+        run: cmake --build ${{github.workspace}}/build --config ${{ matrix.type }} --parallel
 
       - name: Test
         working-directory: ${{github.workspace}}/build
-        run: ctest --test-dir tests --output-on-failure --verbose
+        run: ctest --test-dir tests --output-on-failure --verbose -C ${{ matrix.type }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -15,16 +15,25 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 if (MSVC)
     set(MINJA_FUZZTEST_ENABLED_DEFAULT OFF)
+    set(MINJA_USE_VENV_DEFAULT OFF)
 else()
     set(MINJA_FUZZTEST_ENABLED_DEFAULT ON)
+    set(MINJA_USE_VENV_DEFAULT ON)
 endif()
-option(MINJA_FUZZTEST_ENABLED       "minja: fuzztests enabled"  MINJA_FUZZTEST_ENABLED_DEFAULT)
+option(MINJA_FUZZTEST_ENABLED       "minja: fuzztests enabled"                              MINJA_FUZZTEST_ENABLED_DEFAULT)
 option(MINJA_FUZZTEST_FUZZING_MODE  "minja: run fuzztests (if enabled) in fuzzing mode"     OFF)
+option(MINJA_USE_VENV               "minja: use Python venv for build"                      MINJA_USE_VENV_DEFAULT)
 
 # Note: tests require C++14 because google/fuzztest depends on a version of gtest that requires it
 # (and we don't want to use an older version of fuzztest)
 # Examples are built w/ C++11 to check the compatibility of the library.
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL")
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+if (NOT MSVC)
+    add_compile_options(-Wall -Wextra -pedantic -Werror)
+endif()
 
 include(FetchContent)
 
@@ -55,21 +64,24 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
 endif()
 
-# Create a python venv w/ the required dependencies
-find_package(Python3 COMPONENTS Interpreter REQUIRED)
-set(VENV_DIR "${CMAKE_BINARY_DIR}/venv")
-if(WIN32)
-    set(VENV_PYTHON "${VENV_DIR}/Scripts/python.exe")
-else()
-    set(VENV_PYTHON "${VENV_DIR}/bin/python")
+set(Python_FIND_STRATEGY LOCATION CACHE STRING "Python find strategy" FORCE)
+find_package(Python COMPONENTS Interpreter REQUIRED)
+if(MINJA_USE_VENV)
+    # Create a python venv w/ the required dependencies
+    set(VENV_DIR "${CMAKE_BINARY_DIR}/venv")
+    if(WIN32)
+        set(VENV_PYTHON "${VENV_DIR}/Scripts/python.exe")
+    else()
+        set(VENV_PYTHON "${VENV_DIR}/bin/python")
+    endif()
+    execute_process(
+        COMMAND ${Python_EXECUTABLE} -m venv "${VENV_DIR}"
+        COMMAND_ERROR_IS_FATAL ANY)
+    execute_process(
+        COMMAND ${VENV_PYTHON} -m pip install -r "${CMAKE_SOURCE_DIR}/requirements.txt"
+        COMMAND_ERROR_IS_FATAL ANY)
+    set(Python_EXECUTABLE "${VENV_PYTHON}" CACHE FILEPATH "Path to Python executable in venv" FORCE)
 endif()
-execute_process(
-    COMMAND ${Python3_EXECUTABLE} -m venv "${VENV_DIR}"
-    COMMAND_ERROR_IS_FATAL ANY)
-execute_process(
-    COMMAND ${VENV_PYTHON} -m pip install -r "${CMAKE_SOURCE_DIR}/requirements.txt"
-    COMMAND_ERROR_IS_FATAL ANY)
-set(Python_EXECUTABLE "${VENV_PYTHON}" CACHE FILEPATH "Path to Python executable in venv" FORCE)
 message(STATUS "Python executable: ${Python_EXECUTABLE}")
 
 find_program(CPPCHECK cppcheck)

diff --git a/README.md b/README.md
@@ -9,10 +9,12 @@ It is **not general purpose**: it includes just what’s needed for actual chat
 > [!WARNING]  
 > TL;DR: use of Minja is *at your own risk*, and the risks are plenty! See [Security & Privacy](#security--privacy) section below.
 
+[![CI](https://github.com/google/minja/actions/workflows/build.yml/badge.svg)](https://github.com/google/minja/actions/workflows/build.yml)
+
 ## Design goals:
 
 - Support each and every major LLM found on HuggingFace
-    - See `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt) for the list of models currently supported
+  - See `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt) for the list of models currently supported
 - Easy to integrate to/with projects such as [llama.cpp](https://github.com/ggerganov/llama.cpp) or [gemma.cpp](https://github.com/google/gemma.cpp):
   - Header-only
   - C++11
@@ -24,7 +26,7 @@ It is **not general purpose**: it includes just what’s needed for actual chat
 
 - Address glaring Prompt injection risks in current Jinja chat templating practices. See [Security & Privacy](#security--privacy) below
 - Additional features from Jinja that aren't used by the template(s) of any major LLM (no feature creep!)
-    - Please don't submit PRs with such features, they will unfortunately be rejected.
+  - Please don't submit PRs with such features, they will unfortunately be rejected.
 - Full Jinja compliance (neither syntax-wise, nor filters / tests / globals)
 
 ## Usage:
@@ -199,7 +201,7 @@ Main limitations (non-exhaustive list):
     - Which version of GCC / clang did you compile the tests with? On which OS version?
     - If you intend to contribute a fix:
         - Please read [CONTRIBUTING](./CONTRIBUTING.md) first. You'd have to sign a CLA, which your employer may need to accept.
-        - Please test as many gated models as possible (use `cmake -B build -DMINJA_TEST_GATED_MODELS=1 ...` and edit `MODEL_IDS` in [tests/CMakeLists.txt](./tests/CMakeLists.txt) appropriately if you don't have access to some models)
+        - Please test as many gated models as possible (use `cmake -B build -DMINJA_TEST_GATED_MODELS=1 ...` and edit MODEL_LIST appropriately)
 
 - For bonus points, check the style of your edits with:
 

diff --git a/include/minja/minja.hpp b/include/minja/minja.hpp
@@ -449,7 +449,7 @@ class Value : public std::enable_shared_from_this<Value> {
   Value operator*(const Value& rhs) const {
       if (is_string() && rhs.is_number_integer()) {
         std::ostringstream out;
-        for (int i = 0, n = rhs.get<int64_t>(); i < n; ++i) {
+        for (int64_t i = 0, n = rhs.get<int64_t>(); i < n; ++i) {
           out << to_str();
         }
         return out.str();

diff --git a/scripts/fetch_templates_and_goldens.py b/scripts/fetch_templates_and_goldens.py
@@ -49,10 +49,16 @@ def strftime_now(format):
     return now.strftime(format)
 
 
+def join_cmake_path(parent, child):
+    '''
+        On Windows, CMake will interpret any backslashes as escapes so we return / for path separators
+    '''
+    return '/'.join(x.replace(r'\\', '/') for x in (parent, child))
+
 def handle_chat_template(output_folder, model_id, variant, template_src, context_files):
     model_name = model_id.replace("/", "-")
     base_name = f'{model_name}-{variant}' if variant else model_name
-    template_file = os.path.join(output_folder, f'{base_name}.jinja')
+    template_file = join_cmake_path(output_folder, f'{base_name}.jinja')
 
     with open(template_file, 'w') as f:
         f.write(template_src)
@@ -87,7 +93,7 @@ def handle_chat_template(output_folder, model_id, variant, template_src, context
         if template_hates_the_system and any(m['role'] == 'system' for m in context['messages']):
             continue
 
-        output_file = os.path.join(output_folder, f'{base_name}-{context_name}.txt')
+        output_file = join_cmake_path(output_folder, f'{base_name}-{context_name}.txt')
 
         render_context = json.loads(json.dumps(context))
 

diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh
@@ -13,6 +13,6 @@
 #
 set -euo pipefail
 
-cmake -B build "$@" && \
-    cmake --build build -j && \
-    ctest --test-dir build -j --output-on-failure
+cmake -B build "$@" -DCMAKE_BUILD_TYPE=Release && \
+    cmake --build build -j --config Release && \
+    ctest --test-dir build -j -C Release --output-on-failure
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -7,7 +7,6 @@
 # SPDX-License-Identifier: MIT
 
 add_executable(test-syntax test-syntax.cpp)
-set_target_properties(test-syntax PROPERTIES CXX_STANDARD 17)
 target_link_libraries(test-syntax PRIVATE
     nlohmann_json::nlohmann_json
     gtest_main
@@ -16,7 +15,6 @@ target_link_libraries(test-syntax PRIVATE
 gtest_discover_tests(test-syntax)
 
 add_executable(test-chat-template test-chat-template.cpp)
-set_target_properties(test-chat-template PROPERTIES CXX_STANDARD 17)
 target_link_libraries(test-chat-template PRIVATE nlohmann_json::nlohmann_json)
 
 set(MODEL_IDS
@@ -35,7 +33,6 @@ set(MODEL_IDS
     deepseek-ai/deepseek-coder-33b-instruct
     deepseek-ai/DeepSeek-Coder-V2-Instruct
     deepseek-ai/DeepSeek-V2.5
-    dreamgen/WizardLM-2-7B
     google/gemma-2-2b-it # Gated
     google/gemma-7b-it # Gated
     indischepartij/MiniCPM-3B-OpenHermes-2.5-v2
@@ -66,6 +63,7 @@ set(MODEL_IDS
     Qwen/Qwen2-VL-7B-Instruct
     Qwen/Qwen2.5-7B-Instruct
     Qwen/Qwen2.5-Math-7B-Instruct
+    Qwen/QwQ-32B-Preview
     teknium/OpenHermes-2.5-Mistral-7B
     TheBloke/FusionNet_34Bx2_MoE-AWQ
 
@@ -75,6 +73,7 @@ set(MODEL_IDS
     # Can't find template(s), TODO:
     # ai21labs/Jamba-v0.1
     # apple/OpenELM-1_1B-Instruct 
+    # dreamgen/WizardLM-2-7B
     # xai-org/grok-1
 )
 
@@ -88,12 +87,18 @@ execute_process(
         ${MODEL_IDS}
     OUTPUT_VARIABLE CHAT_TEMPLATE_TEST_CASES
     OUTPUT_STRIP_TRAILING_WHITESPACE
+    COMMAND_ERROR_IS_FATAL ANY
 )
 string(REPLACE "\n" ";" CHAT_TEMPLATE_TEST_CASES "${CHAT_TEMPLATE_TEST_CASES}")
+list(LENGTH CHAT_TEMPLATE_TEST_CASES CHAT_TEMPLATE_TEST_CASES_COUNT)
+message(STATUS "Found ${CHAT_TEMPLATE_TEST_CASES_COUNT} chat template test cases")
+if (CHAT_TEMPLATE_TEST_CASES_COUNT LESS 10)
+    message(ERROR "Not enough chat template test cases found")
+endif()
 foreach(test_case ${CHAT_TEMPLATE_TEST_CASES})
     separate_arguments(test_args UNIX_COMMAND "${test_case}")
     list(GET test_args -1 last_arg)
-    string(REGEX REPLACE "^[^ ]+/([^ /]+)\\.[^.]+$" "\\1" test_name "${last_arg}")
+    string(REGEX REPLACE "^[^ ]+/([^ /\\]+)\\.[^.]+$" "\\1" test_name "${last_arg}")
     add_test(NAME ${test_name} COMMAND $<TARGET_FILE:test-chat-template> ${test_args})
     set_tests_properties(${test_name} PROPERTIES SKIP_RETURN_CODE 127)
 endforeach()

diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
@@ -55,58 +55,58 @@ int main(int argc, char *argv[]) {
         return 1;
     }
 
-    std::string tmpl_file = argv[1];
-    std::string ctx_file = argv[2];
-    std::string golden_file = argv[3];
-
-    auto tmpl_str = read_file(tmpl_file);
-
-    if (ctx_file == "n/a")
-    {
-        std::cout << "# Skipping template: " << tmpl_file << "\n" << tmpl_str << std::endl;
-        return 127;
-    }
+    try {
+        std::string tmpl_file = argv[1];
+        std::string ctx_file = argv[2];
+        std::string golden_file = argv[3];
+
+        auto tmpl_str = read_file(tmpl_file);
+
+        if (ctx_file == "n/a")
+        {
+            std::cout << "# Skipping template: " << tmpl_file << "\n" << tmpl_str << std::endl;
+            return 127;
+        }
 
-    std::cout << "# Testing template: " << tmpl_file << std::endl
-              << "# With context: " << ctx_file << std::endl
-              << "# Against golden file: " << golden_file << std::endl
-              << std::flush;
+        std::cout << "# Testing template: " << tmpl_file << std::endl
+                << "# With context: " << ctx_file << std::endl
+                << "# Against golden file: " << golden_file << std::endl
+                << std::flush;
 
-    auto ctx = json::parse(read_file(ctx_file));
+        auto ctx = json::parse(read_file(ctx_file));
 
-    minja::chat_template tmpl(
-        tmpl_str,
-        ctx.at("bos_token"),
-        ctx.at("eos_token"));
+        minja::chat_template tmpl(
+            tmpl_str,
+            ctx.at("bos_token"),
+            ctx.at("eos_token"));
 
-    std::string expected;
-    try {
-        expected = read_file(golden_file);
-    } catch (const std::runtime_error &e) {
-        std::cerr << "Failed to read golden file: " << golden_file << std::endl;
-        std::cerr << e.what() << std::endl;
-        return 1;
-    }
+        std::string expected;
+        try {
+            expected = read_file(golden_file);
+        } catch (const std::exception &e) {
+            std::cerr << "Failed to read golden file: " << golden_file << std::endl;
+            std::cerr << e.what() << std::endl;
+            return 1;
+        }
 
-    std::string actual;
-    try {
-        actual = tmpl.apply(
-            ctx.at("messages"),
-            ctx.contains("tools") ? ctx.at("tools") : json(),
-            ctx.at("add_generation_prompt"),
-            ctx.contains("tools") ? json{
-                                        {"builtin_tools", {"wolfram_alpha", "brave_search"}}}
-                                  : json());
-    } catch (const std::runtime_error &e) {
-        std::cerr << "Error applying template: " << e.what() << std::endl;
-        return 1;
-    }
+        std::string actual;
+        try {
+            actual = tmpl.apply(
+                ctx.at("messages"),
+                ctx.contains("tools") ? ctx.at("tools") : json(),
+                ctx.at("add_generation_prompt"),
+                ctx.contains("tools") ? json{
+                                            {"builtin_tools", {"wolfram_alpha", "brave_search"}}}
+                                    : json());
+        } catch (const std::exception &e) {
+            std::cerr << "Error applying template: " << e.what() << std::endl;
+            return 1;
+        }
 
-    try {
         assert_equals(expected, actual);
         std::cout << "Test passed successfully." << std::endl;
         return 0;
-    } catch (const std::runtime_error &e) {
+    } catch (const std::exception &e) {
         std::cerr << "Test failed: " << e.what() << std::endl;
         return 1;
     }

diff --git a/tests/test-syntax.cpp b/tests/test-syntax.cpp
@@ -27,22 +27,26 @@ static std::string render(const std::string & template_str, const json & binding
 }
 
 const minja::Options lstrip_blocks {
-    .trim_blocks = false,
-    .lstrip_blocks = true,
-    .keep_trailing_newline = false,
+    /* .trim_blocks = */ false,
+    /* .lstrip_blocks = */ true,
+    /* .keep_trailing_newline = */ false,
 };
 const minja::Options trim_blocks {
-    .trim_blocks = true,
-    .lstrip_blocks = false,
-    .keep_trailing_newline = false,
+    /* .trim_blocks = */ true,
+    /* .lstrip_blocks = */ false,
+    /* .keep_trailing_newline = */ false,
 };
 const minja::Options lstrip_trim_blocks {
-    .trim_blocks = true,
-    .lstrip_blocks = true,
-    .keep_trailing_newline = false,
+    /* .trim_blocks = */ true,
+    /* .lstrip_blocks = */ true,
+    /* .keep_trailing_newline = */ false,
 };
 
 TEST(SyntaxTest, SimpleCases) {
+    EXPECT_EQ(
+        "\r\nhey\r\nho!",
+        render("\r\n{{ 'hey\r\nho!' }}\r\n", {}, {}));
+
     EXPECT_EQ(
         "a\n  b\n|  a\n  b\n",
         render("{% set txt = 'a\\nb\\n' %}{{ txt | indent(2) }}|{{ txt | indent(2, first=true) }}", {}, {}));