Skip to content

Commit 8f0486b

Browse files
committed
Handle planner follow-ups for empty tool results
1 parent a93fe02 commit 8f0486b

5 files changed

Lines changed: 302 additions & 7 deletions

File tree

data/model_settings.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"model": "openai/gpt-4o-mini-2024-07-18",
3-
"system_prompt": "You are a helpful but funny assistant. You have access to tools. User is Jack and live in Olrlando, Fl",
4-
"updated_at": "2025-10-28T21:00:33.372068+00:00"
3+
"system_prompt": "You are a focused assistant that collaborates with planner-provided MCP tools.\n\nDecision rules:\n1. Confirm the current date and timezone by calling the `calendar_current_context` tool before referencing \"today\" or upcoming events.\n2. Select tools that match the planner's context tags and ask the user before touching tools outside those tags.\n3. When a tool returns no data, a guardrail message, or an error, confirm with the user whether to widen the time span or include the additional planner contexts before retrying.\n4. Never guess or fabricate tool output—collect whatever details you still need from the user.\n5. Ask for consent before launching broad or cross-context searches.",
4+
"updated_at": "2025-01-01T00:00:00+00:00"
55
}

src/backend/chat/streaming.py

Lines changed: 80 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -805,11 +805,15 @@ async def stream_conversation(
805805

806806
arguments_raw = function.get("arguments")
807807
status = "finished"
808+
result_text = ""
809+
result_obj: Any | None = None
810+
missing_arguments = False
808811
if not arguments_raw or arguments_raw.strip() == "":
809812
result_text = (
810813
f"Tool {tool_name} requires arguments but none were provided."
811814
)
812815
status = "error"
816+
missing_arguments = True
813817
logger.warning("Missing tool arguments for %s", tool_name)
814818
else:
815819
try:
@@ -839,13 +843,17 @@ async def stream_conversation(
839843
if session_id and _tool_requires_session_id(tool_name):
840844
working_arguments.setdefault("session_id", session_id)
841845
try:
842-
result = await self._tool_client.call_tool(
846+
result_obj = await self._tool_client.call_tool(
843847
tool_name, working_arguments
844848
)
845849
result_text = self._tool_client.format_tool_result(
846-
result
850+
result_obj
851+
)
852+
status = (
853+
"error"
854+
if getattr(result_obj, "isError", False)
855+
else "finished"
847856
)
848-
status = "error" if result.isError else "finished"
849857
except Exception as exc: # pragma: no cover - MCP errors
850858
logger.exception(
851859
"Tool '%s' raised an exception", tool_name
@@ -892,8 +900,41 @@ async def stream_conversation(
892900
),
893901
}
894902

895-
if status != "error" and _looks_like_no_result(result_text):
896-
expand_contexts = True
903+
notice_reason = _classify_tool_followup(
904+
status,
905+
result_text,
906+
tool_error_flag=bool(
907+
getattr(result_obj, "isError", False)
908+
),
909+
missing_arguments=missing_arguments,
910+
)
911+
if notice_reason is not None:
912+
next_contexts: list[str] = []
913+
will_use_all_tools = False
914+
if tool_context_plan is not None:
915+
if notice_reason in {"no_results", "empty_result", "tool_error"}:
916+
next_contexts = (
917+
tool_context_plan.additional_contexts_for_attempt(hop_count)
918+
)
919+
will_use_all_tools = (
920+
tool_context_plan.use_all_tools_for_attempt(hop_count + 1)
921+
)
922+
if notice_reason in {"no_results", "empty_result", "tool_error"}:
923+
expand_contexts = True
924+
notice_payload = {
925+
"type": "tool_followup_required",
926+
"tool": tool_name or "unknown",
927+
"reason": notice_reason,
928+
"message": result_text,
929+
"attempt": hop_count,
930+
"next_contexts": next_contexts,
931+
"will_use_all_tools": will_use_all_tools,
932+
"confirmation_required": True,
933+
}
934+
yield {
935+
"event": "notice",
936+
"data": json.dumps(notice_payload),
937+
}
897938

898939
hop_count += 1
899940

@@ -942,10 +983,44 @@ def _looks_like_no_result(result_text: str) -> bool:
942983
"wasn't found",
943984
"nothing found",
944985
"no matching",
986+
"no events found",
945987
)
946988
return any(phrase in lowered for phrase in phrases)
947989

948990

991+
def _classify_tool_followup(
992+
status: str,
993+
result_text: str | None,
994+
*,
995+
tool_error_flag: bool,
996+
missing_arguments: bool,
997+
) -> str | None:
998+
"""Classify tool results that require follow-up guidance for the assistant."""
999+
1000+
text = result_text if isinstance(result_text, str) else ""
1001+
normalized = text.strip().lower()
1002+
1003+
if missing_arguments:
1004+
return "missing_arguments"
1005+
1006+
if status == "error":
1007+
if _looks_like_no_result(text):
1008+
return "no_results"
1009+
if tool_error_flag or not normalized:
1010+
return "tool_error"
1011+
if "invalid" in normalized and "argument" in normalized:
1012+
return "tool_error"
1013+
return "tool_error"
1014+
1015+
if not normalized:
1016+
return "empty_result"
1017+
1018+
if _looks_like_no_result(text):
1019+
return "no_results"
1020+
1021+
return None
1022+
1023+
9491024
def _is_tool_support_error(error: OpenRouterError) -> bool:
9501025
detail = error.detail
9511026
message = ""

src/backend/chat/tool_context_planner.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,33 @@ def contexts_for_attempt(self, attempt: int) -> list[str]:
3333
merged.append(normalized)
3434
return merged
3535

36+
def additional_contexts_for_attempt(self, attempt: int) -> list[str]:
37+
"""Return new contexts that would be introduced on the next attempt."""
38+
39+
if not self.stages:
40+
return []
41+
42+
if attempt < 0:
43+
attempt = 0
44+
45+
if attempt + 1 >= len(self.stages):
46+
return []
47+
48+
seen: set[str] = set()
49+
for stage in self.stages[: attempt + 1]:
50+
for context in stage:
51+
normalized = context.strip().lower()
52+
if normalized:
53+
seen.add(normalized)
54+
55+
additions: list[str] = []
56+
for context in self.stages[attempt + 1]:
57+
normalized = context.strip().lower()
58+
if not normalized or normalized in seen or normalized in additions:
59+
continue
60+
additions.append(normalized)
61+
return additions
62+
3663
def use_all_tools_for_attempt(self, attempt: int) -> bool:
3764
"""Return True when the attempt should fall back to every tool."""
3865

tests/test_streaming.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,61 @@ async def stream_chat_raw(self, payload: dict[str, Any]):
311311
yield {"data": "[DONE]"}
312312

313313

314+
class MultiToolOpenRouterClient(DummyOpenRouterClient):
315+
def __init__(self, tool_calls: list[dict[str, Any]], final_message: str) -> None:
316+
super().__init__()
317+
self.tool_calls = tool_calls
318+
self.final_message = final_message
319+
self.call_index = 0
320+
321+
async def stream_chat_raw(self, payload: dict[str, Any]):
322+
self.payloads.append(payload)
323+
if self.call_index < len(self.tool_calls):
324+
call = self.tool_calls[self.call_index]
325+
self.call_index += 1
326+
arguments = call.get("arguments", {})
327+
if isinstance(arguments, dict):
328+
arguments_payload = json.dumps(arguments)
329+
else:
330+
arguments_payload = str(arguments)
331+
chunk = {
332+
"id": f"gen-tool-{self.call_index}",
333+
"choices": [
334+
{
335+
"delta": {
336+
"tool_calls": [
337+
{
338+
"id": f"call_{self.call_index}",
339+
"type": "function",
340+
"function": {
341+
"name": call.get("name", "calendar_lookup"),
342+
"arguments": arguments_payload,
343+
},
344+
}
345+
]
346+
},
347+
"finish_reason": "tool_calls",
348+
}
349+
],
350+
}
351+
yield {"data": json.dumps(chunk)}
352+
yield {"data": "[DONE]"}
353+
return
354+
355+
self.call_index += 1
356+
final_chunk = {
357+
"id": f"gen-final-{self.call_index}",
358+
"choices": [
359+
{
360+
"delta": {"content": self.final_message},
361+
"finish_reason": "stop",
362+
}
363+
],
364+
}
365+
yield {"data": json.dumps(final_chunk)}
366+
yield {"data": "[DONE]"}
367+
368+
314369
class ExpandingToolClient:
315370
def __init__(self) -> None:
316371
self.context_history: list[list[str]] = []
@@ -585,6 +640,18 @@ async def test_streaming_expands_contexts_after_no_result() -> None:
585640
assert len(client.payloads) == 2
586641
assert events[-1]["data"] == "[DONE]"
587642

643+
notice_events = [
644+
json.loads(event["data"])
645+
for event in events
646+
if event.get("event") == "notice"
647+
]
648+
assert notice_events, "Expected notice event after empty tool result"
649+
notice = notice_events[0]
650+
assert notice["reason"] == "no_results"
651+
assert notice["tool"] == "calendar_lookup"
652+
assert notice["next_contexts"] == ["tasks"]
653+
assert notice["confirmation_required"] is True
654+
588655

589656
@pytest.mark.anyio("asyncio")
590657
async def test_structured_tool_choice_does_not_retry_without_tools() -> None:
@@ -625,3 +692,118 @@ async def test_structured_tool_choice_does_not_retry_without_tools() -> None:
625692
pass
626693

627694
assert client.calls == 1
695+
696+
697+
@pytest.mark.anyio("asyncio")
698+
async def test_streaming_emits_notice_for_missing_arguments() -> None:
699+
client = MultiToolOpenRouterClient(
700+
[{"name": "calendar_lookup", "arguments": ""}],
701+
final_message="Please share more details.",
702+
)
703+
tool_client = ExpandingToolClient()
704+
handler = StreamingHandler(
705+
client, # type: ignore[arg-type]
706+
DummyRepository(), # type: ignore[arg-type]
707+
tool_client, # type: ignore[arg-type]
708+
default_model="openrouter/auto",
709+
)
710+
711+
request = ChatCompletionRequest(
712+
messages=[ChatMessage(role="user", content="Check my calendar today")],
713+
)
714+
conversation = [{"role": "user", "content": "Check my calendar today"}]
715+
plan = ToolContextPlan(stages=[["calendar"], ["tasks"]], broad_search=True)
716+
initial_tools = tool_client.get_openai_tools_for_contexts(
717+
plan.contexts_for_attempt(0)
718+
)
719+
720+
events: list[dict[str, Any]] = []
721+
async for event in handler.stream_conversation(
722+
"session-missing-args",
723+
request,
724+
conversation,
725+
initial_tools,
726+
None,
727+
plan,
728+
):
729+
events.append(event)
730+
731+
notice_events = [
732+
json.loads(event["data"])
733+
for event in events
734+
if event.get("event") == "notice"
735+
]
736+
737+
assert notice_events, "Expected notice event when tool arguments are missing"
738+
notice = notice_events[0]
739+
assert notice["reason"] == "missing_arguments"
740+
assert notice["tool"] == "calendar_lookup"
741+
assert notice["next_contexts"] == []
742+
assert notice["confirmation_required"] is True
743+
assert tool_client.calls == 0
744+
assert len(client.payloads) == 2
745+
assert events[-1]["data"] == "[DONE]"
746+
747+
748+
@pytest.mark.anyio("asyncio")
749+
async def test_streaming_handles_multi_stage_notices() -> None:
750+
client = MultiToolOpenRouterClient(
751+
[
752+
{"name": "calendar_lookup", "arguments": {"query": "habit review"}},
753+
{"name": "tasks_lookup", "arguments": {"query": "habit review"}},
754+
],
755+
final_message="Let's confirm the plan.",
756+
)
757+
tool_client = ExpandingToolClient()
758+
tool_client.results = [
759+
"No events found in that window.",
760+
"No matching tasks were located.",
761+
]
762+
handler = StreamingHandler(
763+
client, # type: ignore[arg-type]
764+
DummyRepository(), # type: ignore[arg-type]
765+
tool_client, # type: ignore[arg-type]
766+
default_model="openrouter/auto",
767+
)
768+
769+
request = ChatCompletionRequest(
770+
messages=[ChatMessage(role="user", content="Help me build better habits")],
771+
)
772+
conversation = [{"role": "user", "content": "Help me build better habits"}]
773+
plan = ToolContextPlan(
774+
stages=[["calendar"], ["tasks"], ["notes"]],
775+
broad_search=True,
776+
)
777+
initial_tools = tool_client.get_openai_tools_for_contexts(
778+
plan.contexts_for_attempt(0)
779+
)
780+
781+
events: list[dict[str, Any]] = []
782+
async for event in handler.stream_conversation(
783+
"session-multi-stage",
784+
request,
785+
conversation,
786+
initial_tools,
787+
None,
788+
plan,
789+
):
790+
events.append(event)
791+
792+
notice_events = [
793+
json.loads(event["data"])
794+
for event in events
795+
if event.get("event") == "notice"
796+
]
797+
798+
assert len(notice_events) == 2
799+
first_notice, second_notice = notice_events
800+
assert first_notice["reason"] == "no_results"
801+
assert first_notice["next_contexts"] == ["tasks"]
802+
assert second_notice["reason"] == "no_results"
803+
assert second_notice["next_contexts"] == ["notes"]
804+
assert tool_client.context_history[0] == ["calendar"]
805+
assert ["calendar", "tasks"] in tool_client.context_history
806+
assert ["calendar", "tasks", "notes"] in tool_client.context_history
807+
assert tool_client.calls == 2
808+
assert len(client.payloads) == 3
809+
assert events[-1]["data"] == "[DONE]"

tests/test_tool_context_planner.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@ def test_planner_expands_habit_contexts() -> None:
2929
assert plan.contexts_for_attempt(2) == ["calendar", "tasks", "notes"]
3030

3131

32+
def test_plan_reports_additional_contexts() -> None:
33+
planner = ToolContextPlanner()
34+
request = _make_request("Help me build better habits this month")
35+
36+
plan = planner.plan(request, [])
37+
38+
assert plan.additional_contexts_for_attempt(0) == ["tasks"]
39+
assert plan.additional_contexts_for_attempt(1) == ["notes"]
40+
assert plan.additional_contexts_for_attempt(2) == []
41+
42+
3243
def test_planner_routes_documents_to_gdrive() -> None:
3344
planner = ToolContextPlanner()
3445
request = _make_request("search docs for last year's budget")

0 commit comments

Comments
 (0)