diff --git a/dynamiq/nodes/agents/agent.py b/dynamiq/nodes/agents/agent.py index 8c646bbaa..922612fde 100644 --- a/dynamiq/nodes/agents/agent.py +++ b/dynamiq/nodes/agents/agent.py @@ -596,7 +596,7 @@ def _handle_function_calling_mode( if isinstance(action_input, str): try: - action_input = json.loads(action_input) + action_input = json.loads(action_input, strict=False) except json.JSONDecodeError as e: raise ActionParsingException(f"Error parsing action_input string. {e}", recoverable=True) @@ -618,11 +618,11 @@ def _handle_structured_output_mode( try: if isinstance(llm_generated_output, str): try: - llm_generated_output_json = json.loads(llm_generated_output) + llm_generated_output_json = json.loads(llm_generated_output, strict=False) except json.JSONDecodeError: # Handle known LLM bug where multiple JSON objects are returned. # Use raw_decode to parse only the first valid JSON object. - decoder = json.JSONDecoder() + decoder = json.JSONDecoder(strict=False) llm_generated_output_json, _ = decoder.raw_decode(llm_generated_output.strip()) else: llm_generated_output_json = llm_generated_output @@ -645,7 +645,7 @@ def _handle_structured_output_mode( try: if isinstance(action_input, str): - action_input = json.loads(action_input) + action_input = json.loads(action_input, strict=False) except json.JSONDecodeError as e: raise ActionParsingException(f"Error parsing action_input string. {e}", recoverable=True) diff --git a/dynamiq/nodes/agents/prompts/react/instructions.py b/dynamiq/nodes/agents/prompts/react/instructions.py index 905f6d340..6438ca079 100644 --- a/dynamiq/nodes/agents/prompts/react/instructions.py +++ b/dynamiq/nodes/agents/prompts/react/instructions.py @@ -170,10 +170,11 @@ IMPORTANT RULES: - You MUST ALWAYS include "thought" as the FIRST field in your JSON -- ALWAYS populate the "thought" field FIRST before "action_input" in your response. +- ALWAYS populate the "thought" field FIRST before any other field (particularly "action_input") in your response. - Each tool has a specific input format you must strictly follow - In action_input field, provide properly formatted JSON with double quotes -- Avoid using extra backslashes +- When action_input contains multi-line content (e.g. shell commands, code), you MUST escape newlines as \\n within the JSON string — do NOT use literal line breaks inside JSON string values. +- Json has to be parsable with json.loads() in Python. - Do not use markdown code blocks around your JSON - Never leave action_input empty - Ensure proper JSON syntax with quoted keys and values @@ -198,7 +199,7 @@ you call `provide_final_answer` to deliver the final response. FUNCTION CALLING GUIDELINES: -- ALWAYS populate the "thought" field FIRST before "action_input" in your function calls +- ALWAYS populate the "thought" field FIRST before any other field (particularly "action_input") in your function calls - Analyze the request carefully to determine if tools are needed - Call functions with properly formatted arguments - Handle tool responses appropriately before providing final answer diff --git a/tests/unit/nodes/agents/test_agent_parsing.py b/tests/unit/nodes/agents/test_agent_parsing.py index 8504c97aa..3cba1e8eb 100644 --- a/tests/unit/nodes/agents/test_agent_parsing.py +++ b/tests/unit/nodes/agents/test_agent_parsing.py @@ -306,6 +306,104 @@ def test_structured_output_multiple_jsons_takes_first(mocker): assert action_input == {"command": "ls"} +def test_structured_output_action_input_with_literal_newlines(): + """strict=False allows action_input containing literal newlines (common LLM mistake).""" + import uuid + + from dynamiq import connections, prompts + from dynamiq.nodes.agents import Agent + from dynamiq.nodes.llms import OpenAI + from dynamiq.nodes.types import InferenceMode + + conn = connections.OpenAI(id=str(uuid.uuid4()), api_key="fake-key") + llm = OpenAI( + name="TestLLM", + model="gpt-4o-mini", + connection=conn, + prompt=prompts.Prompt(messages=[prompts.Message(role="user", content="{{input}}")]), + ) + agent = Agent(name="test-agent", llm=llm, tools=[], inference_mode=InferenceMode.STRUCTURED_OUTPUT) + + # action_input is a JSON string with a literal newline inside (not escaped as \\n). + # This is what LLMs produce for multi-line shell commands / code. + output = ( + '{"thought": "run script", "action": "SandboxShellTool", ' + '"action_input": "{\\"command\\": \\"echo hello\\necho world\\"}", "output_files": ""}' + ) + thought, action, action_input = agent._handle_structured_output_mode(output, loop_num=1) + assert thought == "run script" + assert action == "SandboxShellTool" + assert action_input == {"command": "echo hello\necho world"} + + +def test_structured_output_fallback_decoder_with_literal_newlines(): + """Fallback JSONDecoder(strict=False) handles multiple concatenated JSONs with literal newlines.""" + import uuid + + from dynamiq import connections, prompts + from dynamiq.nodes.agents import Agent + from dynamiq.nodes.llms import OpenAI + from dynamiq.nodes.types import InferenceMode + + conn = connections.OpenAI(id=str(uuid.uuid4()), api_key="fake-key") + llm = OpenAI( + name="TestLLM", + model="gpt-4o-mini", + connection=conn, + prompt=prompts.Prompt(messages=[prompts.Message(role="user", content="{{input}}")]), + ) + agent = Agent(name="test-agent", llm=llm, tools=[], inference_mode=InferenceMode.STRUCTURED_OUTPUT) + + # Two concatenated JSON objects where the first has a literal newline in action_input. + # json.loads fails (extra data), fallback raw_decode must also use strict=False. + output = ( + '{"thought": "write file", "action": "SandboxShellTool", ' + '"action_input": "{\\"command\\": \\"cat > f.py\\nprint(1)\\"}", "output_files": ""}' + '\n{"thought": "done", "action": "finish", "action_input": "ok", "output_files": ""}' + ) + thought, action, action_input = agent._handle_structured_output_mode(output, loop_num=1) + assert thought == "write file" + assert action == "SandboxShellTool" + assert action_input == {"command": "cat > f.py\nprint(1)"} + + +def test_function_calling_action_input_with_literal_newlines(mocker): + """FC mode: strict=False allows action_input with literal newlines.""" + import uuid + + from dynamiq import connections, prompts + from dynamiq.nodes.agents import Agent + from dynamiq.nodes.llms import OpenAI + from dynamiq.nodes.types import InferenceMode + + conn = connections.OpenAI(id=str(uuid.uuid4()), api_key="fake-key") + llm = OpenAI( + name="TestLLM", + model="gpt-4o-mini", + connection=conn, + prompt=prompts.Prompt(messages=[prompts.Message(role="user", content="{{input}}")]), + ) + agent = Agent(name="test-agent", llm=llm, tools=[], inference_mode=InferenceMode.FUNCTION_CALLING) + + # Simulate an LLM result with a tool_call whose action_input has a literal newline + mock_result = mocker.MagicMock() + mock_result.output = { + "tool_calls": [ + { + "function": { + "name": "SandboxShellTool", + "arguments": {"thought": "run it", "action_input": '{"cmd": "ls\nls -la"}'}, + } + } + ] + } + + thought, action, action_input = agent._handle_function_calling_mode(mock_result, loop_num=1) + assert thought == "run it" + assert action == "SandboxShellTool" + assert action_input == {"cmd": "ls\nls -la"} + + def _mock_llm_response(text: str): from litellm import ModelResponse