volcengine · mvanhorn · Mar 21, 2026 · Mar 21, 2026
diff --git a/docs/images/ov-provenance-example.png b/docs/images/ov-provenance-example.png
diff --git a/openviking/server/routers/search.py b/openviking/server/routers/search.py
@@ -16,7 +16,6 @@
 from openviking.telemetry import TelemetryRequest
 
 
-
 def _sanitize_floats(obj: Any) -> Any:
     """Recursively replace inf/nan with 0.0 to ensure JSON compliance."""
     if isinstance(obj, float):
@@ -29,6 +28,7 @@ def _sanitize_floats(obj: Any) -> Any:
         return [_sanitize_floats(v) for v in obj]
     return obj
 
+
 router = APIRouter(prefix="/api/v1/search", tags=["search"])
 
 
@@ -41,6 +41,7 @@ class FindRequest(BaseModel):
     node_limit: Optional[int] = None
     score_threshold: Optional[float] = None
     filter: Optional[Dict[str, Any]] = None
+    include_provenance: bool = False
     telemetry: TelemetryRequest = False
 
 
@@ -54,6 +55,7 @@ class SearchRequest(BaseModel):
     node_limit: Optional[int] = None
     score_threshold: Optional[float] = None
     filter: Optional[Dict[str, Any]] = None
+    include_provenance: bool = False
     telemetry: TelemetryRequest = False
 
 
@@ -96,7 +98,7 @@ async def find(
     )
     result = execution.result
     if hasattr(result, "to_dict"):
-        result = result.to_dict()
+        result = result.to_dict(include_provenance=request.include_provenance)
     result = _sanitize_floats(result)
     return Response(
         status="ok",
@@ -136,7 +138,7 @@ async def _search():
     )
     result = execution.result
     if hasattr(result, "to_dict"):
-        result = result.to_dict()
+        result = result.to_dict(include_provenance=request.include_provenance)
     result = _sanitize_floats(result)
     return Response(
         status="ok",

diff --git a/openviking_cli/retrieve/types.py b/openviking_cli/retrieve/types.py
@@ -345,8 +345,13 @@ def __iter__(self):
     def __post_init__(self):
         self.total = len(self.memories) + len(self.resources) + len(self.skills)
 
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary format."""
+    def to_dict(self, include_provenance: bool = False) -> Dict[str, Any]:
+        """Convert to dictionary format.
+
+        Args:
+            include_provenance: If True, include query_results with thinking
+                trace and searched_directories for retrieval observability.
+        """
         result = {
             "memories": [self._context_to_dict(m) for m in self.memories],
             "resources": [self._context_to_dict(r) for r in self.resources],
@@ -360,6 +365,9 @@ def to_dict(self) -> Dict[str, Any]:
                 "queries": [self._query_to_dict(q) for q in self.query_plan.queries],
             }
 
+        if include_provenance and self.query_results:
+            result["provenance"] = [self._query_result_to_dict(qr) for qr in self.query_results]
+
         return result
 
     def _context_to_dict(self, ctx: MatchedContext) -> Dict[str, Any]:
@@ -385,6 +393,24 @@ def _query_to_dict(self, q: TypedQuery) -> Dict[str, Any]:
             "priority": q.priority,
         }
 
+    def _query_result_to_dict(self, qr: "QueryResult") -> Dict[str, Any]:
+        """Convert QueryResult to dict with provenance data."""
+        return {
+            "query": qr.query.query,
+            "searched_directories": qr.searched_directories,
+            "matched_contexts": [
+                {
+                    "uri": ctx.uri,
+                    "tier": f"L{ctx.level}",
+                    "context_type": ctx.context_type.value,
+                    "score": ctx.score,
+                    "match_reason": ctx.match_reason,
+                }
+                for ctx in qr.matched_contexts
+            ],
+            "thinking_trace": qr.thinking_trace.to_dict(),
+        }
+
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "FindResult":
         """Construct FindResult from a dictionary (e.g. HTTP JSON response)."""

diff --git a/tests/retrieve/test_provenance.py b/tests/retrieve/test_provenance.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for search result provenance metadata."""
+
+from __future__ import annotations
+
+from openviking_cli.retrieve.types import (
+    ContextType,
+    FindResult,
+    MatchedContext,
+    QueryResult,
+    ThinkingTrace,
+    TypedQuery,
+)
+
+
+class TestFindResultProvenance:
+    def _make_find_result(self) -> FindResult:
+        """Build a FindResult with query_results for testing."""
+        ctx = MatchedContext(
+            uri="viking://resources/docs/arch.md",
+            context_type=ContextType.RESOURCE,
+            level=2,
+            abstract="Architecture doc",
+            score=0.87,
+            match_reason="semantic_match",
+        )
+        query = TypedQuery(
+            query="architecture",
+            context_type=ContextType.RESOURCE,
+            intent="find architecture docs",
+        )
+        trace = ThinkingTrace()
+        qr = QueryResult(
+            query=query,
+            matched_contexts=[ctx],
+            searched_directories=["resources/", "resources/docs/"],
+            thinking_trace=trace,
+        )
+        return FindResult(
+            memories=[],
+            resources=[ctx],
+            skills=[],
+            query_results=[qr],
+        )
+
+    def test_to_dict_without_provenance(self):
+        result = self._make_find_result()
+        d = result.to_dict(include_provenance=False)
+        assert "provenance" not in d
+        assert d["total"] == 1
+        assert len(d["resources"]) == 1
+
+    def test_to_dict_with_provenance(self):
+        result = self._make_find_result()
+        d = result.to_dict(include_provenance=True)
+        assert "provenance" in d
+        assert len(d["provenance"]) == 1
+
+        prov = d["provenance"][0]
+        assert prov["query"] == "architecture"
+        assert prov["searched_directories"] == ["resources/", "resources/docs/"]
+        assert len(prov["matched_contexts"]) == 1
+
+        ctx = prov["matched_contexts"][0]
+        assert ctx["uri"] == "viking://resources/docs/arch.md"
+        assert ctx["tier"] == "L2"
+        assert ctx["context_type"] == "resource"
+        assert ctx["score"] == 0.87
+        assert ctx["match_reason"] == "semantic_match"
+
+        assert "thinking_trace" in prov
+        assert "statistics" in prov["thinking_trace"]
+
+    def test_to_dict_default_no_provenance(self):
+        result = self._make_find_result()
+        d = result.to_dict()
+        assert "provenance" not in d
+
+    def test_provenance_without_query_results(self):
+        result = FindResult(memories=[], resources=[], skills=[])
+        d = result.to_dict(include_provenance=True)
+        assert "provenance" not in d
+
+    def test_existing_fields_unchanged_with_provenance(self):
+        result = self._make_find_result()
+        d_without = result.to_dict(include_provenance=False)
+        d_with = result.to_dict(include_provenance=True)
+
+        # All existing fields should be identical
+        assert d_without["memories"] == d_with["memories"]
+        assert d_without["resources"] == d_with["resources"]
+        assert d_without["skills"] == d_with["skills"]
+        assert d_without["total"] == d_with["total"]