-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllm_string_parser.py
More file actions
103 lines (87 loc) · 3.58 KB
/
llm_string_parser.py
File metadata and controls
103 lines (87 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import re
import json
from typing import Any, Dict, List, Optional
qwen3_schema = {
"x-regex": r"^(?:<think>\n?(?P<reasoning_content>.+?)\n?</think>\s*)?(?P<content>.*?)(?=(?:<tool_call>|<\|im_end\|>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\s*)+)?(?P<trailing>.*)$",
"type": "object",
"properties": {
"role": {"const": "assistant"},
"content": {"type": "string"},
"reasoning_content": {"type": "string"},
"tool_calls": {
"type": "array",
"x-regex-iterator": r"<tool_call>\s*(.+?)\s*</tool_call>",
"items": {
"x-parser": "json",
"x-parser-args": {"transform": "{type: 'function', function: @}"},
"type": "object",
"properties": {
"type": {"const": "function"},
"function": {
"type": "object",
"properties": {
"name": {"type": "string"},
"arguments": {
"type": "object",
"additionalProperties": {},
},
},
},
},
},
},
},
}
def parse_with_schema(text: str, schema: Dict[str, Any] = qwen3_schema) -> Dict[str, Any]:
tc_schema = schema["properties"]["tool_calls"]
tool_call_iter = re.compile(tc_schema["x-regex-iterator"], flags=re.DOTALL)
tool_resp_re = re.compile(r"<tool_response>\s*(.+?)\s*</tool_response>", flags=re.DOTALL)
tool_calls: List[Dict[str, Any]] = []
matches = list(tool_call_iter.finditer(text))
for i, tm in enumerate(matches):
raw = tm.group(1).strip()
# parse tool call json
try:
parsed = json.loads(raw)
except json.JSONDecodeError as e:
tool_calls.append({
"type": "function",
"function": {"name": None, "arguments": {}},
"raw": raw,
"parse_error": f"json_decode_error: {e}",
"tool_response": None,
})
continue
transformed = {"type": "function", "function": parsed}
# enforce minimal constraints
fn = transformed["function"]
if transformed["type"] != "function":
raise ValueError(f"Invalid tool_call.type: {transformed.get('type')}")
name = fn.get("name")
if not isinstance(name, str):
raise ValueError(f"Invalid tool_call.function.name: {name!r}")
args = fn.get("arguments", {})
if not isinstance(args, dict):
args = {} if args is None else {"_raw": args}
fn["arguments"] = args
# ---- extract tool_response ONLY if it follows this tool_call ----
after_call = tm.end()
before_next_call = matches[i + 1].start() if i + 1 < len(matches) else len(text)
window = text[after_call:before_next_call]
# allows junk like "user\n" or "<|im_start|>user" between them
m_resp = tool_resp_re.search(window)
tool_response = None
tool_response_json = None
if m_resp:
raw_resp = m_resp.group(1).strip()
tool_response = raw_resp
try:
tool_response_json = json.loads(raw_resp)
except Exception:
tool_response_json = None
transformed["tool_response"] = tool_response
transformed["tool_response_json"] = tool_response_json
tool_calls.append(transformed)
return {
"tool_calls": tool_calls,
}