diff --git a/docs/images/ov-provenance-example.png b/docs/images/ov-provenance-example.png new file mode 100644 index 000000000..dd961ad90 Binary files /dev/null and b/docs/images/ov-provenance-example.png differ diff --git a/openviking/server/routers/search.py b/openviking/server/routers/search.py index 8b10cdc30..6f6c2054d 100644 --- a/openviking/server/routers/search.py +++ b/openviking/server/routers/search.py @@ -16,7 +16,6 @@ from openviking.telemetry import TelemetryRequest - def _sanitize_floats(obj: Any) -> Any: """Recursively replace inf/nan with 0.0 to ensure JSON compliance.""" if isinstance(obj, float): @@ -29,6 +28,7 @@ def _sanitize_floats(obj: Any) -> Any: return [_sanitize_floats(v) for v in obj] return obj + router = APIRouter(prefix="/api/v1/search", tags=["search"]) @@ -41,6 +41,7 @@ class FindRequest(BaseModel): node_limit: Optional[int] = None score_threshold: Optional[float] = None filter: Optional[Dict[str, Any]] = None + include_provenance: bool = False telemetry: TelemetryRequest = False @@ -54,6 +55,7 @@ class SearchRequest(BaseModel): node_limit: Optional[int] = None score_threshold: Optional[float] = None filter: Optional[Dict[str, Any]] = None + include_provenance: bool = False telemetry: TelemetryRequest = False @@ -96,7 +98,7 @@ async def find( ) result = execution.result if hasattr(result, "to_dict"): - result = result.to_dict() + result = result.to_dict(include_provenance=request.include_provenance) result = _sanitize_floats(result) return Response( status="ok", @@ -136,7 +138,7 @@ async def _search(): ) result = execution.result if hasattr(result, "to_dict"): - result = result.to_dict() + result = result.to_dict(include_provenance=request.include_provenance) result = _sanitize_floats(result) return Response( status="ok", diff --git a/openviking_cli/retrieve/types.py b/openviking_cli/retrieve/types.py index f1ca51060..620595840 100644 --- a/openviking_cli/retrieve/types.py +++ b/openviking_cli/retrieve/types.py @@ -345,8 +345,13 @@ def __iter__(self): def __post_init__(self): self.total = len(self.memories) + len(self.resources) + len(self.skills) - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary format.""" + def to_dict(self, include_provenance: bool = False) -> Dict[str, Any]: + """Convert to dictionary format. + + Args: + include_provenance: If True, include query_results with thinking + trace and searched_directories for retrieval observability. + """ result = { "memories": [self._context_to_dict(m) for m in self.memories], "resources": [self._context_to_dict(r) for r in self.resources], @@ -360,6 +365,9 @@ def to_dict(self) -> Dict[str, Any]: "queries": [self._query_to_dict(q) for q in self.query_plan.queries], } + if include_provenance and self.query_results: + result["provenance"] = [self._query_result_to_dict(qr) for qr in self.query_results] + return result def _context_to_dict(self, ctx: MatchedContext) -> Dict[str, Any]: @@ -385,6 +393,24 @@ def _query_to_dict(self, q: TypedQuery) -> Dict[str, Any]: "priority": q.priority, } + def _query_result_to_dict(self, qr: "QueryResult") -> Dict[str, Any]: + """Convert QueryResult to dict with provenance data.""" + return { + "query": qr.query.query, + "searched_directories": qr.searched_directories, + "matched_contexts": [ + { + "uri": ctx.uri, + "tier": f"L{ctx.level}", + "context_type": ctx.context_type.value, + "score": ctx.score, + "match_reason": ctx.match_reason, + } + for ctx in qr.matched_contexts + ], + "thinking_trace": qr.thinking_trace.to_dict(), + } + @classmethod def from_dict(cls, data: Dict[str, Any]) -> "FindResult": """Construct FindResult from a dictionary (e.g. HTTP JSON response).""" diff --git a/tests/retrieve/test_provenance.py b/tests/retrieve/test_provenance.py new file mode 100644 index 000000000..7b36c6804 --- /dev/null +++ b/tests/retrieve/test_provenance.py @@ -0,0 +1,94 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 +"""Tests for search result provenance metadata.""" + +from __future__ import annotations + +from openviking_cli.retrieve.types import ( + ContextType, + FindResult, + MatchedContext, + QueryResult, + ThinkingTrace, + TypedQuery, +) + + +class TestFindResultProvenance: + def _make_find_result(self) -> FindResult: + """Build a FindResult with query_results for testing.""" + ctx = MatchedContext( + uri="viking://resources/docs/arch.md", + context_type=ContextType.RESOURCE, + level=2, + abstract="Architecture doc", + score=0.87, + match_reason="semantic_match", + ) + query = TypedQuery( + query="architecture", + context_type=ContextType.RESOURCE, + intent="find architecture docs", + ) + trace = ThinkingTrace() + qr = QueryResult( + query=query, + matched_contexts=[ctx], + searched_directories=["resources/", "resources/docs/"], + thinking_trace=trace, + ) + return FindResult( + memories=[], + resources=[ctx], + skills=[], + query_results=[qr], + ) + + def test_to_dict_without_provenance(self): + result = self._make_find_result() + d = result.to_dict(include_provenance=False) + assert "provenance" not in d + assert d["total"] == 1 + assert len(d["resources"]) == 1 + + def test_to_dict_with_provenance(self): + result = self._make_find_result() + d = result.to_dict(include_provenance=True) + assert "provenance" in d + assert len(d["provenance"]) == 1 + + prov = d["provenance"][0] + assert prov["query"] == "architecture" + assert prov["searched_directories"] == ["resources/", "resources/docs/"] + assert len(prov["matched_contexts"]) == 1 + + ctx = prov["matched_contexts"][0] + assert ctx["uri"] == "viking://resources/docs/arch.md" + assert ctx["tier"] == "L2" + assert ctx["context_type"] == "resource" + assert ctx["score"] == 0.87 + assert ctx["match_reason"] == "semantic_match" + + assert "thinking_trace" in prov + assert "statistics" in prov["thinking_trace"] + + def test_to_dict_default_no_provenance(self): + result = self._make_find_result() + d = result.to_dict() + assert "provenance" not in d + + def test_provenance_without_query_results(self): + result = FindResult(memories=[], resources=[], skills=[]) + d = result.to_dict(include_provenance=True) + assert "provenance" not in d + + def test_existing_fields_unchanged_with_provenance(self): + result = self._make_find_result() + d_without = result.to_dict(include_provenance=False) + d_with = result.to_dict(include_provenance=True) + + # All existing fields should be identical + assert d_without["memories"] == d_with["memories"] + assert d_without["resources"] == d_with["resources"] + assert d_without["skills"] == d_with["skills"] + assert d_without["total"] == d_with["total"]