diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py index f7a36137..571861cb 100644 --- a/hindsight-api-slim/hindsight_api/api/http.py +++ b/hindsight-api-slim/hindsight_api/api/http.py @@ -656,6 +656,11 @@ class ReflectRequest(BaseModel): "Each group is a leaf {tags, match} or compound {and: [...]}, {or: [...]}, {not: ...}.", ) + include_reasoning_chain: bool = Field( + default=False, + description="If true and budget is 'mid' or 'high', the response may include a reasoning_chain " + "showing how the query was decomposed into sub-questions and what was found for each.", + ) @model_validator(mode="after") def validate_tags_exclusive(self) -> "ReflectRequest": if self.tags is not None and self.tag_groups is not None: @@ -792,6 +797,11 @@ class ReflectResponse(BaseModel): default=None, description="Execution trace of tool and LLM calls. Only present when include.tool_calls is set.", ) + reasoning_chain: dict | None = Field( + default=None, + description="Reasoning chain showing query decomposition and intermediate conclusions. " + "Only present when include_reasoning_chain=true and budget >= mid.", + ) class DispositionTraits(BaseModel): @@ -2546,12 +2556,31 @@ async def api_reflect( llm_calls=llm_calls, ) + # Serialize reasoning chain if requested and present + reasoning_chain_data: dict | None = None + if request.include_reasoning_chain and core_result.reasoning_chain is not None: + rc = core_result.reasoning_chain + reasoning_chain_data = { + "original_query": rc["original_query"], + "decomposition_rationale": rc.get("decomposition_rationale", ""), + "steps": [ + { + "step_number": step["step_number"], + "sub_question": step["sub_question"], + "conclusion": step["conclusion"], + "sources_used": step.get("sources_used", []), + } + for step in rc.get("steps", []) + ], + } + return ReflectResponse( text=core_result.text, based_on=based_on_result, structured_output=core_result.structured_output, usage=core_result.usage, trace=trace_result, + reasoning_chain=reasoning_chain_data, ) except OperationValidationError as e: diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index d8ae44f7..26159775 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -5388,6 +5388,13 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]: total_tokens=agent_result.usage.total_tokens, ) + # Serialize reasoning chain if present + reasoning_chain_dict = None + if agent_result.reasoning_chain is not None: + import dataclasses as _dc + + reasoning_chain_dict = _dc.asdict(agent_result.reasoning_chain) + # Return response (compatible with existing API) result = ReflectResult( text=agent_result.text, @@ -5397,6 +5404,7 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]: tool_trace=tool_trace_result, llm_trace=llm_trace_result, directives_applied=directives_applied_result, + reasoning_chain=reasoning_chain_dict, ) # Call post-operation hook if validator is configured diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py index 5d700b5e..dacfe5a1 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py @@ -16,8 +16,9 @@ import tiktoken -from .models import DirectiveInfo, LLMCall, ReflectAgentResult, TokenUsageSummary, ToolCall +from .models import DirectiveInfo, LLMCall, ReasoningChain, ReasoningStep, ReflectAgentResult, TokenUsageSummary, ToolCall from .prompts import FINAL_SYSTEM_PROMPT, _extract_directive_rules, build_final_prompt, build_system_prompt_for_tools +from .tools import tool_decompose from .tools_schema import get_reflect_tools @@ -355,7 +356,7 @@ async def run_reflect_agent( directive_rules = _extract_directive_rules(directives) if directives else None # Get tools for this agent (with directive compliance field if directives exist) - tools = get_reflect_tools(directive_rules=directive_rules) + tools = get_reflect_tools(directive_rules=directive_rules, budget=budget) # Build initial messages (directives are injected into system prompt at START and END) system_prompt = build_system_prompt_for_tools( @@ -764,6 +765,7 @@ def _log_completion(answer: str, iterations: int, forced: bool = False): directives_applied=directives_applied, llm_config=llm_config, response_schema=response_schema, + query=query, ) # Execute other tools in parallel (exclude done tool in all its format variants) @@ -923,6 +925,7 @@ async def _process_done_tool( directives_applied: list[DirectiveInfo], llm_config: "LLMProvider | None" = None, response_schema: dict | None = None, + query: str = "", ) -> ReflectAgentResult: """Process the done tool call and return the result.""" args = done_call.arguments @@ -938,6 +941,27 @@ async def _process_done_tool( used_mental_model_ids = [mid for mid in (args.get("mental_model_ids") or []) if mid in available_mental_model_ids] used_observation_ids = [oid for oid in (args.get("observation_ids") or []) if oid in available_observation_ids] + # Build reasoning chain from done() arguments if the agent provided reasoning_steps + reasoning_steps_raw = args.get("reasoning_steps") + reasoning_chain: ReasoningChain | None = None + if reasoning_steps_raw and isinstance(reasoning_steps_raw, list): + steps = [] + for i, step in enumerate(reasoning_steps_raw): + steps.append( + ReasoningStep( + step_number=i + 1, + sub_question=step.get("sub_question", ""), + evidence_summary="", # Agent doesn't provide this separately + conclusion=step.get("conclusion", ""), + sources_used=step.get("source_ids", []), + ) + ) + reasoning_chain = ReasoningChain( + original_query=query, + steps=steps, + decomposition_rationale="", + ) + # Generate structured output if schema provided structured_output = None final_usage = usage @@ -965,6 +989,7 @@ async def _process_done_tool( used_mental_model_ids=used_mental_model_ids, used_observation_ids=used_observation_ids, directives_applied=directives_applied, + reasoning_chain=reasoning_chain, ) @@ -1080,6 +1105,12 @@ async def _execute_tool( depth = args.get("depth", "chunk") return await expand_fn(memory_ids, depth) + elif tool_name == "decompose": + return await tool_decompose( + sub_questions=args.get("sub_questions", []), + rationale=args.get("rationale", ""), + ) + else: return {"error": f"Unknown tool: {tool_name}"} @@ -1106,6 +1137,11 @@ def _summarize_input(tool_name: str, args: dict[str, Any]) -> str: memory_ids = args.get("memory_ids", []) depth = args.get("depth", "chunk") return f"(memory_ids=[{len(memory_ids)} ids], depth={depth})" + elif tool_name == "decompose": + sub_questions = args.get("sub_questions", []) + rationale = args.get("rationale", "") + rationale_preview = f"'{rationale[:30]}...'" if len(rationale) > 30 else f"'{rationale}'" + return f"(sub_questions={len(sub_questions)}, rationale={rationale_preview})" elif tool_name == "done": answer = args.get("answer", "") answer_preview = f"'{answer[:30]}...'" if len(answer) > 30 else f"'{answer}'" diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/models.py b/hindsight-api-slim/hindsight_api/engine/reflect/models.py index 26c3f150..343b6ff5 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/models.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/models.py @@ -2,11 +2,34 @@ Pydantic models for the reflect agent. """ +from __future__ import annotations + +from dataclasses import dataclass, field from typing import Any, Literal from pydantic import BaseModel, Field +@dataclass +class ReasoningStep: + """A single reasoning step in a multi-step reflection.""" + + step_number: int + sub_question: str + evidence_summary: str + conclusion: str + sources_used: list[str] # memory IDs referenced + + +@dataclass +class ReasoningChain: + """Complete reasoning chain for multi-step reflection.""" + + original_query: str + steps: list[ReasoningStep] = field(default_factory=list) + decomposition_rationale: str = "" + + class ObservationSection(BaseModel): """A section within an observation with its supporting memories.""" @@ -107,3 +130,6 @@ class ReflectAgentResult(BaseModel): directives_applied: list[DirectiveInfo] = Field( default_factory=list, description="Directive mental models that affected this reflection" ) + reasoning_chain: ReasoningChain | None = Field( + default=None, description="Multi-step reasoning chain when decompose() was used" + ) diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py index 43faffe9..ca4f06aa 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py @@ -219,6 +219,10 @@ def build_system_prompt_for_tools( "", "Think: What ENTITIES and CONCEPTS does this question involve? Search for each separately.", "", + "For multi-faceted queries at MID or HIGH budget, consider using decompose() instead of", + "manually splitting searches. decompose() generates focused sub-questions and lets you", + "investigate each with the full retrieval hierarchy before synthesizing a final answer.", + "", ] ) @@ -245,6 +249,11 @@ def build_system_prompt_for_tools( "- Check multiple sources when the question warrants it", "- Verify stale data if it's central to the answer", "- Don't over-explore, but ensure reasonable coverage", + "- For complex queries that span multiple topics or require comparing different knowledge", + " areas, use the decompose() tool to break the query into 2 focused sub-questions.", + " Investigate each sub-question separately using recall and search tools, then synthesize", + " your findings in the final answer. Include reasoning_steps in your done() call to", + " document your chain of reasoning.", "", ] ) @@ -258,6 +267,11 @@ def build_system_prompt_for_tools( "- Verify information across different retrieval levels", "- Use expand() to get full context on important memories", "- Take time to synthesize a complete, well-researched answer", + "- For complex queries, use the decompose() tool to break the query into up to 4", + " sub-questions. Investigate each sub-question thoroughly using multiple search", + " strategies (mental models, observations, and raw facts). Use expand() to get full", + " context for critical evidence. Include detailed reasoning_steps in your done() call", + " showing how each sub-question's findings contribute to the final answer.", "", ] ) @@ -271,7 +285,10 @@ def build_system_prompt_for_tools( "2. If no mental model or it's stale, try search_observations() for consolidated knowledge", "3. If observations are stale OR you need specific details, use recall() for raw facts", "4. Use expand() if you need more context on specific memories", - "5. When ready, call done() with your answer and supporting IDs", + "5. (Optional, MID/HIGH budget) If the query is complex or multi-faceted, use decompose()", + " to break it into sub-questions, then investigate each sub-question systematically", + " using the retrieval hierarchy above.", + "6. When ready, call done() with your answer and supporting IDs", ] ) else: @@ -280,7 +297,10 @@ def build_system_prompt_for_tools( "1. First, try search_observations() - check for consolidated knowledge", "2. If search_observations returns 0 results OR observations are stale, you MUST call recall() for raw facts", "3. Use expand() if you need more context on specific memories", - "4. When ready, call done() with your answer and supporting IDs", + "4. (Optional, MID/HIGH budget) If the query is complex or multi-faceted, use decompose()", + " to break it into sub-questions, then investigate each sub-question systematically", + " using the retrieval hierarchy above.", + "5. When ready, call done() with your answer and supporting IDs", ] ) diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/tools.py b/hindsight-api-slim/hindsight_api/engine/reflect/tools.py index a55d9833..1da1fe08 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/tools.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/tools.py @@ -386,3 +386,37 @@ async def tool_expand( results.append(item) return {"results": results, "count": len(results)} + + +async def tool_decompose( + sub_questions: list[str], + rationale: str, +) -> dict[str, Any]: + """ + Process a query decomposition. + + Unlike other tools that call the database, this one simply validates + and returns the sub-questions in a structured format for the agent + to work through sequentially. + + Args: + sub_questions: List of 2-4 focused sub-questions to investigate + rationale: Brief explanation of why this decomposition makes sense + + Returns: + Dict with structured sub-questions and investigation instructions + """ + return { + "status": "decomposed", + "sub_questions": [ + {"index": i + 1, "question": q, "status": "pending"} + for i, q in enumerate(sub_questions) + ], + "rationale": rationale, + "instructions": ( + "Now investigate each sub-question using recall, search_mental_models, " + "and search_observations. After gathering evidence for all sub-questions, " + "call done() with your synthesized answer and include reasoning_steps " + "to document your chain of reasoning." + ), + } diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/tools_schema.py b/hindsight-api-slim/hindsight_api/engine/reflect/tools_schema.py index 15c14fe1..81646b54 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/tools_schema.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/tools_schema.py @@ -134,6 +134,57 @@ }, } +TOOL_DECOMPOSE = { + "type": "function", + "function": { + "name": "decompose", + "description": ( + "Break a complex query into 2-4 focused sub-questions that can each be answered " + "independently. Use this when the query spans multiple topics, requires comparing " + "different domains, or needs step-by-step reasoning.\n\n" + "After decomposing, use recall/search tools to gather evidence for EACH sub-question " + "before calling done with your synthesized answer.\n\n" + "ONLY available for MID and HIGH budget queries. Do NOT use for simple factual lookups." + ), + "parameters": { + "type": "object", + "properties": { + "sub_questions": { + "type": "array", + "items": {"type": "string"}, + "description": "2-4 sub-questions to investigate. Each should be self-contained and answerable via recall/search tools.", + }, + "rationale": { + "type": "string", + "description": "Brief explanation of why this decomposition makes sense and what each sub-question contributes to answering the original query.", + }, + }, + "required": ["sub_questions", "rationale"], + }, + }, +} + +_REASONING_STEPS_PROPERTY = { + "type": "array", + "items": { + "type": "object", + "properties": { + "sub_question": {"type": "string"}, + "conclusion": {"type": "string"}, + "source_ids": { + "type": "array", + "items": {"type": "string"}, + "description": "Memory IDs that support this step's conclusion", + }, + }, + "required": ["sub_question", "conclusion"], + }, + "description": ( + "Optional reasoning chain. Include when you used decompose() to break the query into " + "sub-questions. Each step should summarize what you found for that sub-question." + ), +} + TOOL_DONE_ANSWER = { "type": "function", "function": { @@ -161,6 +212,7 @@ "items": {"type": "string"}, "description": "Array of observation IDs that support your answer", }, + "reasoning_steps": _REASONING_STEPS_PROPERTY, }, "required": ["answer"], }, @@ -216,6 +268,7 @@ def _build_done_tool_with_directives(directive_rules: list[str]) -> dict: "items": {"type": "string"}, "description": "Array of observation IDs that support your answer", }, + "reasoning_steps": _REASONING_STEPS_PROPERTY, "directive_compliance": { "type": "string", "description": f"REQUIRED: Confirm your answer complies with ALL directives. List each directive and how your answer follows it:\n{rules_list}\n\nFormat: 'Directive 1: [how answer complies]. Directive 2: [how answer complies]...'", @@ -227,7 +280,10 @@ def _build_done_tool_with_directives(directive_rules: list[str]) -> dict: } -def get_reflect_tools(directive_rules: list[str] | None = None) -> list[dict]: +def get_reflect_tools( + directive_rules: list[str] | None = None, + budget: str | None = None, +) -> list[dict]: """ Get the list of tools for the reflect agent. @@ -239,17 +295,23 @@ def get_reflect_tools(directive_rules: list[str] | None = None) -> list[dict]: Args: directive_rules: Optional list of directive rule strings. If provided, the done() tool will require directive compliance confirmation. + budget: Optional budget level string ("low", "mid", "high"). When "mid" or "high", + the decompose() tool is included to enable multi-step reasoning. Returns: List of tool definitions in OpenAI format """ - tools = [ + tools: list[dict] = [ TOOL_SEARCH_MENTAL_MODELS, TOOL_SEARCH_OBSERVATIONS, TOOL_RECALL, TOOL_EXPAND, ] + # Include decompose tool for MID and HIGH budgets only + if budget is not None and budget.lower() in ("mid", "high"): + tools.append(TOOL_DECOMPOSE) + # Use directive-aware done tool if directives are present if directive_rules: tools.append(_build_done_tool_with_directives(directive_rules)) diff --git a/hindsight-api-slim/hindsight_api/engine/response_models.py b/hindsight-api-slim/hindsight_api/engine/response_models.py index f31d39c6..79ff16a3 100644 --- a/hindsight-api-slim/hindsight_api/engine/response_models.py +++ b/hindsight-api-slim/hindsight_api/engine/response_models.py @@ -304,6 +304,10 @@ class ReflectResult(BaseModel): default_factory=list, description="Directive mental models that were applied during this reflection.", ) + reasoning_chain: dict[str, Any] | None = Field( + default=None, + description="Serialized ReasoningChain from multi-step reflection. Only present when include_reasoning_chain=true and budget >= mid.", + ) class EntityObservation(BaseModel):