Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions hindsight-api-slim/hindsight_api/api/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,11 @@ class ReflectRequest(BaseModel):
"Each group is a leaf {tags, match} or compound {and: [...]}, {or: [...]}, {not: ...}.",
)

include_reasoning_chain: bool = Field(
default=False,
description="If true and budget is 'mid' or 'high', the response may include a reasoning_chain "
"showing how the query was decomposed into sub-questions and what was found for each.",
)
@model_validator(mode="after")
def validate_tags_exclusive(self) -> "ReflectRequest":
if self.tags is not None and self.tag_groups is not None:
Expand Down Expand Up @@ -792,6 +797,11 @@ class ReflectResponse(BaseModel):
default=None,
description="Execution trace of tool and LLM calls. Only present when include.tool_calls is set.",
)
reasoning_chain: dict | None = Field(
default=None,
description="Reasoning chain showing query decomposition and intermediate conclusions. "
"Only present when include_reasoning_chain=true and budget >= mid.",
)


class DispositionTraits(BaseModel):
Expand Down Expand Up @@ -2546,12 +2556,31 @@ async def api_reflect(
llm_calls=llm_calls,
)

# Serialize reasoning chain if requested and present
reasoning_chain_data: dict | None = None
if request.include_reasoning_chain and core_result.reasoning_chain is not None:
rc = core_result.reasoning_chain
reasoning_chain_data = {
"original_query": rc["original_query"],
"decomposition_rationale": rc.get("decomposition_rationale", ""),
"steps": [
{
"step_number": step["step_number"],
"sub_question": step["sub_question"],
"conclusion": step["conclusion"],
"sources_used": step.get("sources_used", []),
}
for step in rc.get("steps", [])
],
}

return ReflectResponse(
text=core_result.text,
based_on=based_on_result,
structured_output=core_result.structured_output,
usage=core_result.usage,
trace=trace_result,
reasoning_chain=reasoning_chain_data,
)

except OperationValidationError as e:
Expand Down
8 changes: 8 additions & 0 deletions hindsight-api-slim/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5388,6 +5388,13 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
total_tokens=agent_result.usage.total_tokens,
)

# Serialize reasoning chain if present
reasoning_chain_dict = None
if agent_result.reasoning_chain is not None:
import dataclasses as _dc

reasoning_chain_dict = _dc.asdict(agent_result.reasoning_chain)

# Return response (compatible with existing API)
result = ReflectResult(
text=agent_result.text,
Expand All @@ -5397,6 +5404,7 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
tool_trace=tool_trace_result,
llm_trace=llm_trace_result,
directives_applied=directives_applied_result,
reasoning_chain=reasoning_chain_dict,
)

# Call post-operation hook if validator is configured
Expand Down
40 changes: 38 additions & 2 deletions hindsight-api-slim/hindsight_api/engine/reflect/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

import tiktoken

from .models import DirectiveInfo, LLMCall, ReflectAgentResult, TokenUsageSummary, ToolCall
from .models import DirectiveInfo, LLMCall, ReasoningChain, ReasoningStep, ReflectAgentResult, TokenUsageSummary, ToolCall
from .prompts import FINAL_SYSTEM_PROMPT, _extract_directive_rules, build_final_prompt, build_system_prompt_for_tools
from .tools import tool_decompose
from .tools_schema import get_reflect_tools


Expand Down Expand Up @@ -355,7 +356,7 @@ async def run_reflect_agent(
directive_rules = _extract_directive_rules(directives) if directives else None

# Get tools for this agent (with directive compliance field if directives exist)
tools = get_reflect_tools(directive_rules=directive_rules)
tools = get_reflect_tools(directive_rules=directive_rules, budget=budget)

# Build initial messages (directives are injected into system prompt at START and END)
system_prompt = build_system_prompt_for_tools(
Expand Down Expand Up @@ -764,6 +765,7 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
directives_applied=directives_applied,
llm_config=llm_config,
response_schema=response_schema,
query=query,
)

# Execute other tools in parallel (exclude done tool in all its format variants)
Expand Down Expand Up @@ -923,6 +925,7 @@ async def _process_done_tool(
directives_applied: list[DirectiveInfo],
llm_config: "LLMProvider | None" = None,
response_schema: dict | None = None,
query: str = "",
) -> ReflectAgentResult:
"""Process the done tool call and return the result."""
args = done_call.arguments
Expand All @@ -938,6 +941,27 @@ async def _process_done_tool(
used_mental_model_ids = [mid for mid in (args.get("mental_model_ids") or []) if mid in available_mental_model_ids]
used_observation_ids = [oid for oid in (args.get("observation_ids") or []) if oid in available_observation_ids]

# Build reasoning chain from done() arguments if the agent provided reasoning_steps
reasoning_steps_raw = args.get("reasoning_steps")
reasoning_chain: ReasoningChain | None = None
if reasoning_steps_raw and isinstance(reasoning_steps_raw, list):
steps = []
for i, step in enumerate(reasoning_steps_raw):
steps.append(
ReasoningStep(
step_number=i + 1,
sub_question=step.get("sub_question", ""),
evidence_summary="", # Agent doesn't provide this separately
conclusion=step.get("conclusion", ""),
sources_used=step.get("source_ids", []),
)
)
reasoning_chain = ReasoningChain(
original_query=query,
steps=steps,
decomposition_rationale="",
)

# Generate structured output if schema provided
structured_output = None
final_usage = usage
Expand Down Expand Up @@ -965,6 +989,7 @@ async def _process_done_tool(
used_mental_model_ids=used_mental_model_ids,
used_observation_ids=used_observation_ids,
directives_applied=directives_applied,
reasoning_chain=reasoning_chain,
)


Expand Down Expand Up @@ -1080,6 +1105,12 @@ async def _execute_tool(
depth = args.get("depth", "chunk")
return await expand_fn(memory_ids, depth)

elif tool_name == "decompose":
return await tool_decompose(
sub_questions=args.get("sub_questions", []),
rationale=args.get("rationale", ""),
)

else:
return {"error": f"Unknown tool: {tool_name}"}

Expand All @@ -1106,6 +1137,11 @@ def _summarize_input(tool_name: str, args: dict[str, Any]) -> str:
memory_ids = args.get("memory_ids", [])
depth = args.get("depth", "chunk")
return f"(memory_ids=[{len(memory_ids)} ids], depth={depth})"
elif tool_name == "decompose":
sub_questions = args.get("sub_questions", [])
rationale = args.get("rationale", "")
rationale_preview = f"'{rationale[:30]}...'" if len(rationale) > 30 else f"'{rationale}'"
return f"(sub_questions={len(sub_questions)}, rationale={rationale_preview})"
elif tool_name == "done":
answer = args.get("answer", "")
answer_preview = f"'{answer[:30]}...'" if len(answer) > 30 else f"'{answer}'"
Expand Down
26 changes: 26 additions & 0 deletions hindsight-api-slim/hindsight_api/engine/reflect/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,34 @@
Pydantic models for the reflect agent.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Literal

from pydantic import BaseModel, Field


@dataclass
class ReasoningStep:
"""A single reasoning step in a multi-step reflection."""

step_number: int
sub_question: str
evidence_summary: str
conclusion: str
sources_used: list[str] # memory IDs referenced


@dataclass
class ReasoningChain:
"""Complete reasoning chain for multi-step reflection."""

original_query: str
steps: list[ReasoningStep] = field(default_factory=list)
decomposition_rationale: str = ""


class ObservationSection(BaseModel):
"""A section within an observation with its supporting memories."""

Expand Down Expand Up @@ -107,3 +130,6 @@ class ReflectAgentResult(BaseModel):
directives_applied: list[DirectiveInfo] = Field(
default_factory=list, description="Directive mental models that affected this reflection"
)
reasoning_chain: ReasoningChain | None = Field(
default=None, description="Multi-step reasoning chain when decompose() was used"
)
24 changes: 22 additions & 2 deletions hindsight-api-slim/hindsight_api/engine/reflect/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ def build_system_prompt_for_tools(
"",
"Think: What ENTITIES and CONCEPTS does this question involve? Search for each separately.",
"",
"For multi-faceted queries at MID or HIGH budget, consider using decompose() instead of",
"manually splitting searches. decompose() generates focused sub-questions and lets you",
"investigate each with the full retrieval hierarchy before synthesizing a final answer.",
"",
]
)

Expand All @@ -245,6 +249,11 @@ def build_system_prompt_for_tools(
"- Check multiple sources when the question warrants it",
"- Verify stale data if it's central to the answer",
"- Don't over-explore, but ensure reasonable coverage",
"- For complex queries that span multiple topics or require comparing different knowledge",
" areas, use the decompose() tool to break the query into 2 focused sub-questions.",
" Investigate each sub-question separately using recall and search tools, then synthesize",
" your findings in the final answer. Include reasoning_steps in your done() call to",
" document your chain of reasoning.",
"",
]
)
Expand All @@ -258,6 +267,11 @@ def build_system_prompt_for_tools(
"- Verify information across different retrieval levels",
"- Use expand() to get full context on important memories",
"- Take time to synthesize a complete, well-researched answer",
"- For complex queries, use the decompose() tool to break the query into up to 4",
" sub-questions. Investigate each sub-question thoroughly using multiple search",
" strategies (mental models, observations, and raw facts). Use expand() to get full",
" context for critical evidence. Include detailed reasoning_steps in your done() call",
" showing how each sub-question's findings contribute to the final answer.",
"",
]
)
Expand All @@ -271,7 +285,10 @@ def build_system_prompt_for_tools(
"2. If no mental model or it's stale, try search_observations() for consolidated knowledge",
"3. If observations are stale OR you need specific details, use recall() for raw facts",
"4. Use expand() if you need more context on specific memories",
"5. When ready, call done() with your answer and supporting IDs",
"5. (Optional, MID/HIGH budget) If the query is complex or multi-faceted, use decompose()",
" to break it into sub-questions, then investigate each sub-question systematically",
" using the retrieval hierarchy above.",
"6. When ready, call done() with your answer and supporting IDs",
]
)
else:
Expand All @@ -280,7 +297,10 @@ def build_system_prompt_for_tools(
"1. First, try search_observations() - check for consolidated knowledge",
"2. If search_observations returns 0 results OR observations are stale, you MUST call recall() for raw facts",
"3. Use expand() if you need more context on specific memories",
"4. When ready, call done() with your answer and supporting IDs",
"4. (Optional, MID/HIGH budget) If the query is complex or multi-faceted, use decompose()",
" to break it into sub-questions, then investigate each sub-question systematically",
" using the retrieval hierarchy above.",
"5. When ready, call done() with your answer and supporting IDs",
]
)

Expand Down
34 changes: 34 additions & 0 deletions hindsight-api-slim/hindsight_api/engine/reflect/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,37 @@ async def tool_expand(
results.append(item)

return {"results": results, "count": len(results)}


async def tool_decompose(
sub_questions: list[str],
rationale: str,
) -> dict[str, Any]:
"""
Process a query decomposition.

Unlike other tools that call the database, this one simply validates
and returns the sub-questions in a structured format for the agent
to work through sequentially.

Args:
sub_questions: List of 2-4 focused sub-questions to investigate
rationale: Brief explanation of why this decomposition makes sense

Returns:
Dict with structured sub-questions and investigation instructions
"""
return {
"status": "decomposed",
"sub_questions": [
{"index": i + 1, "question": q, "status": "pending"}
for i, q in enumerate(sub_questions)
],
"rationale": rationale,
"instructions": (
"Now investigate each sub-question using recall, search_mental_models, "
"and search_observations. After gathering evidence for all sub-questions, "
"call done() with your synthesized answer and include reasoning_steps "
"to document your chain of reasoning."
),
}
Loading
Loading