From 26e88ce2a024f981f169db99737566b240e8e995 Mon Sep 17 00:00:00 2001 From: Mohamed Firas <62911996+Fir121@users.noreply.github.com> Date: Wed, 18 Mar 2026 12:32:46 +0400 Subject: [PATCH] fix bug with braces in string during format --- pyproject.toml | 2 +- src/llm_classifier/classifier.py | 8 ++++++-- src/llm_classifier/cluster.py | 7 +++++-- src/llm_classifier/prompt_utils.py | 17 +++++++++++++++++ tests/test_classifier.py | 12 ++++++++++++ tests/test_cluster.py | 21 +++++++++++++++++++++ 6 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 src/llm_classifier/prompt_utils.py diff --git a/pyproject.toml b/pyproject.toml index 6339278..2cb8fb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "llm-classifier" -version = "0.1.2" +version = "0.1.3" description = "Structured LLM based classification, clustering and extraction framework that works with all major API providers" readme = "README.md" license = { file = "LICENSE" } diff --git a/src/llm_classifier/classifier.py b/src/llm_classifier/classifier.py index ad54865..2651020 100644 --- a/src/llm_classifier/classifier.py +++ b/src/llm_classifier/classifier.py @@ -14,6 +14,8 @@ import instructor from pydantic import BaseModel, Field, create_model +from .prompt_utils import safe_prompt_format + T = TypeVar("T", bound=BaseModel) @@ -299,7 +301,8 @@ def _build_prompt( if not system_prompt: system = None else: - system = system_prompt.format( + system = safe_prompt_format( + system_prompt, examples=examples_str, format=format_schema, input=input_text, @@ -309,7 +312,8 @@ def _build_prompt( if not user_prompt: user = None else: - user = user_prompt.format( + user = safe_prompt_format( + user_prompt, examples=examples_str, format=format_schema, input=input_text, diff --git a/src/llm_classifier/cluster.py b/src/llm_classifier/cluster.py index 9954494..80b594b 100644 --- a/src/llm_classifier/cluster.py +++ b/src/llm_classifier/cluster.py @@ -9,6 +9,8 @@ import instructor from pydantic import BaseModel, Field, create_model +from .prompt_utils import safe_prompt_format + T = TypeVar("T", bound=BaseModel) @@ -288,7 +290,8 @@ def _build_prompt( if not system_prompt: system = None else: - system = system_prompt.format( + system = safe_prompt_format( + system_prompt, format=format_schema, n_clusters_instruction=n_clusters_instruction, validation_rules=validation_rules, @@ -298,7 +301,7 @@ def _build_prompt( if not user_prompt: user = None else: - user = user_prompt.format(items=items_str) + user = safe_prompt_format(user_prompt, items=items_str) return system, user diff --git a/src/llm_classifier/prompt_utils.py b/src/llm_classifier/prompt_utils.py new file mode 100644 index 0000000..8263419 --- /dev/null +++ b/src/llm_classifier/prompt_utils.py @@ -0,0 +1,17 @@ +"""Prompt templating helpers.""" + +from __future__ import annotations + +from typing import Any + + +def safe_prompt_format(template: str, **values: Any) -> str: + """Replace known placeholders without parsing other braces. + + Unlike ``str.format``, this helper only replaces exact ``{name}`` tokens for + provided values and leaves all other braces untouched. + """ + rendered = template + for key, value in values.items(): + rendered = rendered.replace(f"{{{key}}}", str(value)) + return rendered diff --git a/tests/test_classifier.py b/tests/test_classifier.py index 233685a..17713dc 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -159,6 +159,18 @@ def test_build_prompt_includes_examples(self, mock_classifier): assert "Great!" in system assert "Examples" in system + def test_build_prompt_allows_literal_braces(self, mock_classifier): + """Prompt templates with literal braces should not raise formatting errors.""" + system, user = mock_classifier._build_prompt( + "payload {with} braces", + Sentiment, + system_prompt="Schema: {format} | keep literal {json}", + user_prompt="Input: {input} | keep literal {meta}", + ) + assert "keep literal {json}" in system + assert "keep literal {meta}" in user + assert "payload {with} braces" in user + # ============================================================================ # Result Types Tests diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 0b87cd0..e4b7360 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -207,6 +207,27 @@ def test_build_prompt_validation_rules_require_all(self, mock_clusterer): ) assert "Every reference ID must be assigned" in system + def test_build_prompt_allows_literal_braces(self, mock_clusterer): + """Prompt templates with literal braces should not raise formatting errors.""" + inputs = [(1, "Item {A}"), (2, "Item B")] + response_schema = mock_clusterer._build_cluster_schema(SimpleCluster, n_items=2) + system, user = mock_clusterer._build_prompt( + inputs, + response_schema, + n_clusters=2, + allow_overlap=False, + require_all=True, + system_prompt=( + "Schema: {format}\n" + "Clustering: {n_clusters_instruction}\n" + "Rules: {validation_rules} | literal {rules}" + ), + user_prompt="Items:\n{items}\nliteral {notes}", + ) + assert "literal {rules}" in system + assert "literal {notes}" in user + assert "Item {A}" in user + # ============================================================================ # Validation Tests