vectorize-io · bjornslib · Mar 15, 2026
diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
@@ -656,6 +656,11 @@ class ReflectRequest(BaseModel):
         "Each group is a leaf {tags, match} or compound {and: [...]}, {or: [...]}, {not: ...}.",
     )
 
+    include_reasoning_chain: bool = Field(
+        default=False,
+        description="If true and budget is 'mid' or 'high', the response may include a reasoning_chain "
+        "showing how the query was decomposed into sub-questions and what was found for each.",
+    )
     @model_validator(mode="after")
     def validate_tags_exclusive(self) -> "ReflectRequest":
         if self.tags is not None and self.tag_groups is not None:
@@ -792,6 +797,11 @@ class ReflectResponse(BaseModel):
         default=None,
         description="Execution trace of tool and LLM calls. Only present when include.tool_calls is set.",
     )
+    reasoning_chain: dict | None = Field(
+        default=None,
+        description="Reasoning chain showing query decomposition and intermediate conclusions. "
+        "Only present when include_reasoning_chain=true and budget >= mid.",
+    )
 
 
 class DispositionTraits(BaseModel):
@@ -2546,12 +2556,31 @@ async def api_reflect(
                     llm_calls=llm_calls,
                 )
 
+            # Serialize reasoning chain if requested and present
+            reasoning_chain_data: dict | None = None
+            if request.include_reasoning_chain and core_result.reasoning_chain is not None:
+                rc = core_result.reasoning_chain
+                reasoning_chain_data = {
+                    "original_query": rc["original_query"],
+                    "decomposition_rationale": rc.get("decomposition_rationale", ""),
+                    "steps": [
+                        {
+                            "step_number": step["step_number"],
+                            "sub_question": step["sub_question"],
+                            "conclusion": step["conclusion"],
+                            "sources_used": step.get("sources_used", []),
+                        }
+                        for step in rc.get("steps", [])
+                    ],
+                }
+
             return ReflectResponse(
                 text=core_result.text,
                 based_on=based_on_result,
                 structured_output=core_result.structured_output,
                 usage=core_result.usage,
                 trace=trace_result,
+                reasoning_chain=reasoning_chain_data,
             )
 
         except OperationValidationError as e:

diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -5388,6 +5388,13 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
                 total_tokens=agent_result.usage.total_tokens,
             )
 
+            # Serialize reasoning chain if present
+            reasoning_chain_dict = None
+            if agent_result.reasoning_chain is not None:
+                import dataclasses as _dc
+
+                reasoning_chain_dict = _dc.asdict(agent_result.reasoning_chain)
+
             # Return response (compatible with existing API)
             result = ReflectResult(
                 text=agent_result.text,
@@ -5397,6 +5404,7 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
                 tool_trace=tool_trace_result,
                 llm_trace=llm_trace_result,
                 directives_applied=directives_applied_result,
+                reasoning_chain=reasoning_chain_dict,
             )
 
             # Call post-operation hook if validator is configured

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py
@@ -16,8 +16,9 @@
 
 import tiktoken
 
-from .models import DirectiveInfo, LLMCall, ReflectAgentResult, TokenUsageSummary, ToolCall
+from .models import DirectiveInfo, LLMCall, ReasoningChain, ReasoningStep, ReflectAgentResult, TokenUsageSummary, ToolCall
 from .prompts import FINAL_SYSTEM_PROMPT, _extract_directive_rules, build_final_prompt, build_system_prompt_for_tools
+from .tools import tool_decompose
 from .tools_schema import get_reflect_tools
 
 
@@ -355,7 +356,7 @@ async def run_reflect_agent(
     directive_rules = _extract_directive_rules(directives) if directives else None
 
     # Get tools for this agent (with directive compliance field if directives exist)
-    tools = get_reflect_tools(directive_rules=directive_rules)
+    tools = get_reflect_tools(directive_rules=directive_rules, budget=budget)
 
     # Build initial messages (directives are injected into system prompt at START and END)
     system_prompt = build_system_prompt_for_tools(
@@ -764,6 +765,7 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
                     directives_applied=directives_applied,
                     llm_config=llm_config,
                     response_schema=response_schema,
+                    query=query,
                 )
 
         # Execute other tools in parallel (exclude done tool in all its format variants)
@@ -923,6 +925,7 @@ async def _process_done_tool(
     directives_applied: list[DirectiveInfo],
     llm_config: "LLMProvider | None" = None,
     response_schema: dict | None = None,
+    query: str = "",
 ) -> ReflectAgentResult:
     """Process the done tool call and return the result."""
     args = done_call.arguments
@@ -938,6 +941,27 @@ async def _process_done_tool(
     used_mental_model_ids = [mid for mid in (args.get("mental_model_ids") or []) if mid in available_mental_model_ids]
     used_observation_ids = [oid for oid in (args.get("observation_ids") or []) if oid in available_observation_ids]
 
+    # Build reasoning chain from done() arguments if the agent provided reasoning_steps
+    reasoning_steps_raw = args.get("reasoning_steps")
+    reasoning_chain: ReasoningChain | None = None
+    if reasoning_steps_raw and isinstance(reasoning_steps_raw, list):
+        steps = []
+        for i, step in enumerate(reasoning_steps_raw):
+            steps.append(
+                ReasoningStep(
+                    step_number=i + 1,
+                    sub_question=step.get("sub_question", ""),
+                    evidence_summary="",  # Agent doesn't provide this separately
+                    conclusion=step.get("conclusion", ""),
+                    sources_used=step.get("source_ids", []),
+                )
+            )
+        reasoning_chain = ReasoningChain(
+            original_query=query,
+            steps=steps,
+            decomposition_rationale="",
+        )
+
     # Generate structured output if schema provided
     structured_output = None
     final_usage = usage
@@ -965,6 +989,7 @@ async def _process_done_tool(
         used_mental_model_ids=used_mental_model_ids,
         used_observation_ids=used_observation_ids,
         directives_applied=directives_applied,
+        reasoning_chain=reasoning_chain,
     )
 
 
@@ -1080,6 +1105,12 @@ async def _execute_tool(
         depth = args.get("depth", "chunk")
         return await expand_fn(memory_ids, depth)
 
+    elif tool_name == "decompose":
+        return await tool_decompose(
+            sub_questions=args.get("sub_questions", []),
+            rationale=args.get("rationale", ""),
+        )
+
     else:
         return {"error": f"Unknown tool: {tool_name}"}
 
@@ -1106,6 +1137,11 @@ def _summarize_input(tool_name: str, args: dict[str, Any]) -> str:
         memory_ids = args.get("memory_ids", [])
         depth = args.get("depth", "chunk")
         return f"(memory_ids=[{len(memory_ids)} ids], depth={depth})"
+    elif tool_name == "decompose":
+        sub_questions = args.get("sub_questions", [])
+        rationale = args.get("rationale", "")
+        rationale_preview = f"'{rationale[:30]}...'" if len(rationale) > 30 else f"'{rationale}'"
+        return f"(sub_questions={len(sub_questions)}, rationale={rationale_preview})"
     elif tool_name == "done":
         answer = args.get("answer", "")
         answer_preview = f"'{answer[:30]}...'" if len(answer) > 30 else f"'{answer}'"

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/models.py b/hindsight-api-slim/hindsight_api/engine/reflect/models.py
@@ -2,11 +2,34 @@
 Pydantic models for the reflect agent.
 """
 
+from __future__ import annotations
+
+from dataclasses import dataclass, field
 from typing import Any, Literal
 
 from pydantic import BaseModel, Field
 
 
+@dataclass
+class ReasoningStep:
+    """A single reasoning step in a multi-step reflection."""
+
+    step_number: int
+    sub_question: str
+    evidence_summary: str
+    conclusion: str
+    sources_used: list[str]  # memory IDs referenced
+
+
+@dataclass
+class ReasoningChain:
+    """Complete reasoning chain for multi-step reflection."""
+
+    original_query: str
+    steps: list[ReasoningStep] = field(default_factory=list)
+    decomposition_rationale: str = ""
+
+
 class ObservationSection(BaseModel):
     """A section within an observation with its supporting memories."""
 
@@ -107,3 +130,6 @@ class ReflectAgentResult(BaseModel):
     directives_applied: list[DirectiveInfo] = Field(
         default_factory=list, description="Directive mental models that affected this reflection"
     )
+    reasoning_chain: ReasoningChain | None = Field(
+        default=None, description="Multi-step reasoning chain when decompose() was used"
+    )
diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py
@@ -219,6 +219,10 @@ def build_system_prompt_for_tools(
             "",
             "Think: What ENTITIES and CONCEPTS does this question involve? Search for each separately.",
             "",
+            "For multi-faceted queries at MID or HIGH budget, consider using decompose() instead of",
+            "manually splitting searches. decompose() generates focused sub-questions and lets you",
+            "investigate each with the full retrieval hierarchy before synthesizing a final answer.",
+            "",
         ]
     )
 
@@ -245,6 +249,11 @@ def build_system_prompt_for_tools(
                     "- Check multiple sources when the question warrants it",
                     "- Verify stale data if it's central to the answer",
                     "- Don't over-explore, but ensure reasonable coverage",
+                    "- For complex queries that span multiple topics or require comparing different knowledge",
+                    "  areas, use the decompose() tool to break the query into 2 focused sub-questions.",
+                    "  Investigate each sub-question separately using recall and search tools, then synthesize",
+                    "  your findings in the final answer. Include reasoning_steps in your done() call to",
+                    "  document your chain of reasoning.",
                     "",
                 ]
             )
@@ -258,6 +267,11 @@ def build_system_prompt_for_tools(
                     "- Verify information across different retrieval levels",
                     "- Use expand() to get full context on important memories",
                     "- Take time to synthesize a complete, well-researched answer",
+                    "- For complex queries, use the decompose() tool to break the query into up to 4",
+                    "  sub-questions. Investigate each sub-question thoroughly using multiple search",
+                    "  strategies (mental models, observations, and raw facts). Use expand() to get full",
+                    "  context for critical evidence. Include detailed reasoning_steps in your done() call",
+                    "  showing how each sub-question's findings contribute to the final answer.",
                     "",
                 ]
             )
@@ -271,7 +285,10 @@ def build_system_prompt_for_tools(
                 "2. If no mental model or it's stale, try search_observations() for consolidated knowledge",
                 "3. If observations are stale OR you need specific details, use recall() for raw facts",
                 "4. Use expand() if you need more context on specific memories",
-                "5. When ready, call done() with your answer and supporting IDs",
+                "5. (Optional, MID/HIGH budget) If the query is complex or multi-faceted, use decompose()",
+                "   to break it into sub-questions, then investigate each sub-question systematically",
+                "   using the retrieval hierarchy above.",
+                "6. When ready, call done() with your answer and supporting IDs",
             ]
         )
     else:
@@ -280,7 +297,10 @@ def build_system_prompt_for_tools(
                 "1. First, try search_observations() - check for consolidated knowledge",
                 "2. If search_observations returns 0 results OR observations are stale, you MUST call recall() for raw facts",
                 "3. Use expand() if you need more context on specific memories",
-                "4. When ready, call done() with your answer and supporting IDs",
+                "4. (Optional, MID/HIGH budget) If the query is complex or multi-faceted, use decompose()",
+                "   to break it into sub-questions, then investigate each sub-question systematically",
+                "   using the retrieval hierarchy above.",
+                "5. When ready, call done() with your answer and supporting IDs",
             ]
         )
 

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/tools.py b/hindsight-api-slim/hindsight_api/engine/reflect/tools.py
@@ -386,3 +386,37 @@ async def tool_expand(
         results.append(item)
 
     return {"results": results, "count": len(results)}
+
+
+async def tool_decompose(
+    sub_questions: list[str],
+    rationale: str,
+) -> dict[str, Any]:
+    """
+    Process a query decomposition.
+
+    Unlike other tools that call the database, this one simply validates
+    and returns the sub-questions in a structured format for the agent
+    to work through sequentially.
+
+    Args:
+        sub_questions: List of 2-4 focused sub-questions to investigate
+        rationale: Brief explanation of why this decomposition makes sense
+
+    Returns:
+        Dict with structured sub-questions and investigation instructions
+    """
+    return {
+        "status": "decomposed",
+        "sub_questions": [
+            {"index": i + 1, "question": q, "status": "pending"}
+            for i, q in enumerate(sub_questions)
+        ],
+        "rationale": rationale,
+        "instructions": (
+            "Now investigate each sub-question using recall, search_mental_models, "
+            "and search_observations. After gathering evidence for all sub-questions, "
+            "call done() with your synthesized answer and include reasoning_steps "
+            "to document your chain of reasoning."
+        ),
+    }