From 4d86ce24ff7d885b30f5d3da6ef5397567d45ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Mon, 16 Mar 2026 16:51:58 +0100 Subject: [PATCH 01/15] feat(retain): add verbatim extraction mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds retain_extraction_mode="verbatim" that stores each chunk as-is without LLM summarization. The LLM still runs to extract entities, temporal info, and location for full indexability — only the fact text is replaced with the original chunk content (one memory per chunk). Useful for RAG-style indexing and benchmarks where original text must be preserved in memory. - Add "verbatim" to RETAIN_EXTRACTION_MODES in config.py - Add VERBATIM_FACT_EXTRACTION_PROMPT with instructions to preserve text - Add _collapse_to_verbatim() post-processing to enforce 1 fact/chunk - Expose in bank config UI dropdown with updated description - Update configuration.md docs with verbatim mode description - Add unit test for _collapse_to_verbatim and integration test via LLM - Fix pre-existing main.py CLI override missing new reranker fields - Fix pre-existing cross_encoder.py ty type error via setattr --- hindsight-api-slim/hindsight_api/config.py | 6 +- .../engine/retain/fact_extraction.py | 60 ++++++- hindsight-api-slim/tests/test_retain.py | 147 +++++++++++++++--- .../src/components/bank-config-view.tsx | 4 +- .../docs/developer/configuration.md | 15 +- 5 files changed, 202 insertions(+), 30 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py index 624ef6791..f41ad18b1 100644 --- a/hindsight-api-slim/hindsight_api/config.py +++ b/hindsight-api-slim/hindsight_api/config.py @@ -443,7 +443,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: DEFAULT_RETAIN_CHUNK_SIZE = 3000 # Max chars per chunk for fact extraction DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom" -RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes +RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom", "verbatim") # Allowed extraction modes DEFAULT_RETAIN_MISSION = None # Declarative spec of what to retain (injected into any extraction mode) DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom") DEFAULT_RETAIN_BATCH_TOKENS = 10_000 # ~40KB of text # Max chars per sub-batch for async retain auto-splitting @@ -1101,9 +1101,7 @@ def from_env(cls) -> "HindsightConfig": ENV_RERANKER_LOCAL_TRUST_REMOTE_CODE, str(DEFAULT_RERANKER_LOCAL_TRUST_REMOTE_CODE) ).lower() in ("true", "1"), - reranker_local_fp16=os.getenv( - ENV_RERANKER_LOCAL_FP16, str(DEFAULT_RERANKER_LOCAL_FP16) - ).lower() + reranker_local_fp16=os.getenv(ENV_RERANKER_LOCAL_FP16, str(DEFAULT_RERANKER_LOCAL_FP16)).lower() in ("true", "1"), reranker_local_bucket_batching=os.getenv( ENV_RERANKER_LOCAL_BUCKET_BATCHING, str(DEFAULT_RERANKER_LOCAL_BUCKET_BATCHING) diff --git a/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py b/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py index f7c897b53..b4a6af633 100644 --- a/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py +++ b/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py @@ -552,6 +552,27 @@ def _chunk_conversation(turns: list[dict], max_chars: int) -> list[str]: examples="", # No examples for custom mode ) +# Verbatim mode: preserve the original text exactly, but still extract metadata +_VERBATIM_GUIDELINES = """══════════════════════════════════════════════════════════════════════════ +VERBATIM MODE — Index content as-is +══════════════════════════════════════════════════════════════════════════ + +Your task is NOT to summarize or rewrite. You must index this content for retrieval. + +RULES: +- Produce EXACTLY ONE fact entry per input chunk. +- In the "what" field, copy the FULL original text verbatim, word for word. Do not shorten, paraphrase, or omit anything. +- Still extract all entities (people, places, organizations, objects, concepts). +- Still extract temporal information (occurred_start, occurred_end, fact_kind). +- Still extract location (where) and people (who) fields. +- fact_type: use "world" unless the content is clearly an interaction with the assistant.""" + +VERBATIM_FACT_EXTRACTION_PROMPT = _BASE_FACT_EXTRACTION_PROMPT.format( + retain_mission_section="{retain_mission_section}", + extraction_guidelines=_VERBATIM_GUIDELINES, + examples="", +) + # Verbose extraction prompt - detailed, comprehensive facts (legacy mode) VERBOSE_FACT_EXTRACTION_PROMPT = """Extract facts from text into structured format with FIVE required dimensions - BE EXTREMELY DETAILED. @@ -770,6 +791,10 @@ def _build_extraction_prompt_and_schema(config) -> tuple[str, type]: ) elif extraction_mode == "verbose": prompt = VERBOSE_FACT_EXTRACTION_PROMPT + elif extraction_mode == "verbatim": + prompt = VERBATIM_FACT_EXTRACTION_PROMPT.format( + retain_mission_section=retain_mission_section, + ) else: base_prompt = CONCISE_FACT_EXTRACTION_PROMPT prompt = base_prompt.format( @@ -2013,15 +2038,46 @@ async def extract_facts_from_contents( global_fact_idx += 1 fact_idx_in_content += 1 - # Step 4: Add time offsets to preserve ordering within each content + # Step 4: For verbatim mode, collapse to one fact per chunk with original text + if config.retain_extraction_mode == "verbatim": + extracted_facts = _collapse_to_verbatim(extracted_facts, chunks_metadata) + + # Step 5: Add time offsets to preserve ordering within each content _add_temporal_offsets(extracted_facts, contents) - # Step 5: Auto-tag facts from label groups with tag=True + # Step 6: Auto-tag facts from label groups with tag=True _inject_label_tags(extracted_facts, config) return extracted_facts, chunks_metadata, total_usage +def _collapse_to_verbatim(facts: list[ExtractedFactType], chunks: list[ChunkMetadata]) -> list[ExtractedFactType]: + """ + For verbatim mode: ensure one fact per chunk with the original chunk text preserved. + + The LLM prompt asks for exactly one fact per chunk, but if it returns more, + this collapses them: keeps the first fact as representative, overrides its + fact_text with the raw chunk text, and merges entities from any extra facts. + """ + chunk_text_map = {c.chunk_index: c.chunk_text for c in chunks} + seen: dict[int, ExtractedFactType] = {} + result: list[ExtractedFactType] = [] + + for fact in facts: + if fact.chunk_index not in seen: + fact.fact_text = chunk_text_map.get(fact.chunk_index, fact.fact_text) + seen[fact.chunk_index] = fact + result.append(fact) + else: + # Merge entities from extra facts into the representative + representative = seen[fact.chunk_index] + for entity in fact.entities: + if entity not in representative.entities: + representative.entities.append(entity) + + return result + + def _parse_datetime(date_str: str): """Parse ISO datetime string.""" from dateutil import parser as date_parser diff --git a/hindsight-api-slim/tests/test_retain.py b/hindsight-api-slim/tests/test_retain.py index cfc203abe..b3642369c 100644 --- a/hindsight-api-slim/tests/test_retain.py +++ b/hindsight-api-slim/tests/test_retain.py @@ -1,11 +1,13 @@ """ Test retain function and chunk storage. """ -import pytest import logging -from datetime import datetime, timezone, timedelta -from hindsight_api.engine.memory_engine import Budget +from datetime import datetime, timedelta, timezone + +import pytest + from hindsight_api import RequestContext +from hindsight_api.engine.memory_engine import Budget logger = logging.getLogger(__name__) @@ -60,7 +62,7 @@ async def test_retain_with_chunks(memory, request_context): request_context=request_context, ) - print(f"\n=== Recall Results (with chunks) ===") + print("\n=== Recall Results (with chunks) ===") print(f"Found {len(result.results)} results") assert len(result.results) > 0, "Should find facts about Alice" @@ -149,7 +151,7 @@ async def test_chunks_and_entities_follow_fact_order(memory, request_context): request_context=request_context, ) - print(f"\n=== Recall Results ===") + print("\n=== Recall Results ===") print(f"Found {len(result.results)} facts") # Extract the order of entities mentioned in facts @@ -421,7 +423,7 @@ async def test_mentioned_at_vs_occurred(memory, request_context): # Verify it's the historical date, not today assert mentioned_dt.year == 2020, f"mentioned_at should be 2020, got {mentioned_dt.year}" - print(f"✓ Test passed: Historical conversation correctly ingested with event_date=2020") + print("✓ Test passed: Historical conversation correctly ingested with event_date=2020") finally: await memory.delete_bank(bank_id, request_context=request_context) @@ -489,15 +491,15 @@ async def test_occurred_dates_not_defaulted(memory, request_context): # If occurred_start is set, it means the LLM extracted it # In this case, log it but don't fail (LLM behavior can vary) print(f"⚠ LLM extracted occurred_start: {fact.occurred_start}") - print(f" This test expects None for present-tense observations") + print(" This test expects None for present-tense observations") else: - print(f"✓ occurred_start is correctly None (not defaulted to mentioned_at)") + print("✓ occurred_start is correctly None (not defaulted to mentioned_at)") if fact.occurred_end is not None: print(f"⚠ LLM extracted occurred_end: {fact.occurred_end}") - print(f" This test expects None for present-tense observations") + print(" This test expects None for present-tense observations") else: - print(f"✓ occurred_end is correctly None (not defaulted to mentioned_at)") + print("✓ occurred_end is correctly None (not defaulted to mentioned_at)") # At least verify they're not equal to mentioned_at if they are set if fact.occurred_start is not None: @@ -513,7 +515,7 @@ async def test_occurred_dates_not_defaulted(memory, request_context): f"occurred_start={occurred_start_dt}, mentioned_at={mentioned_dt}" ) - print(f"✓ Test passed: occurred dates are not incorrectly defaulted to mentioned_at") + print("✓ Test passed: occurred dates are not incorrectly defaulted to mentioned_at") finally: await memory.delete_bank(bank_id, request_context=request_context) @@ -585,7 +587,7 @@ async def test_mentioned_at_from_context_string(memory, request_context): else: print(f"⚠ LLM did not extract date from context, fell back to now(): {mentioned_dt}") - print(f"✓ mentioned_at is always set (never None)") + print("✓ mentioned_at is always set (never None)") finally: await memory.delete_bank(bank_id, request_context=request_context) @@ -851,8 +853,8 @@ async def test_metadata_storage_and_retrieval(memory, request_context): assert len(result.results) > 0, "Should recall stored facts" - print(f"✓ Successfully stored and retrieved facts") - print(f" (Note: Metadata support depends on API implementation)") + print("✓ Successfully stored and retrieved facts") + print(" (Note: Metadata support depends on API implementation)") finally: await memory.delete_bank(bank_id, request_context=request_context) @@ -954,7 +956,7 @@ async def test_mixed_content_batch(memory, request_context): short_units = len(unit_ids[0]) long_units = len(unit_ids[1]) - print(f"✓ Mixed batch processed successfully") + print("✓ Mixed batch processed successfully") print(f" Short content: {short_units} units") print(f" Long content: {long_units} units") @@ -1356,7 +1358,7 @@ async def test_chunks_truncation_behavior(memory, request_context): if truncated_chunks: print(f" {len(truncated_chunks)} chunks were truncated due to token limit") else: - print(f" No chunks were truncated (content within limit)") + print(" No chunks were truncated (content within limit)") else: print("✓ No chunks returned (may be under token limit)") @@ -2210,9 +2212,10 @@ async def test_custom_extraction_mode(): custom guidelines while keeping structural parts intact. """ import os + from hindsight_api import LLMConfig + from hindsight_api.config import _get_raw_config, clear_config_cache from hindsight_api.engine.retain.fact_extraction import extract_facts_from_text - from hindsight_api.config import clear_config_cache, _get_raw_config # Save original env vars original_mode = os.getenv("HINDSIGHT_API_RETAIN_EXTRACTION_MODE") @@ -2284,7 +2287,7 @@ async def test_custom_extraction_mode(): if found_english_only: logger.warning(f"⚠ Found English-only keywords in facts: {found_english_only}") logger.warning(f" Facts: {all_facts_text}") - logger.warning(f" This may indicate the LLM is not strictly following language-specific custom guidelines") + logger.warning(" This may indicate the LLM is not strictly following language-specific custom guidelines") # Log but don't fail - LLM behavior can vary else: logger.info("✓ Successfully extracted only Italian facts, ignored English facts") @@ -2315,6 +2318,109 @@ async def test_custom_extraction_mode(): clear_config_cache() +def test_collapse_to_verbatim_single_fact_per_chunk(): + """ + Unit test for _collapse_to_verbatim: + - One fact per chunk → text overridden with original chunk text + - Two facts from same chunk → collapsed to one, entities merged + """ + from hindsight_api.engine.retain.fact_extraction import _collapse_to_verbatim + from hindsight_api.engine.retain.types import ChunkMetadata, ExtractedFact + + chunks = [ + ChunkMetadata(chunk_text="Alice went to Paris.", fact_count=1, content_index=0, chunk_index=0), + ChunkMetadata(chunk_text="Bob fixed the bug yesterday.", fact_count=2, content_index=0, chunk_index=1), + ] + + facts = [ + ExtractedFact(fact_text="LLM paraphrase of Alice in Paris", fact_type="world", entities=["Alice", "Paris"], chunk_index=0, content_index=0), + ExtractedFact(fact_text="LLM first fact about Bob", fact_type="world", entities=["Bob"], chunk_index=1, content_index=0), + ExtractedFact(fact_text="LLM second fact about bug", fact_type="world", entities=["bug"], chunk_index=1, content_index=0), + ] + + result = _collapse_to_verbatim(facts, chunks) + + assert len(result) == 2, "Should produce exactly one fact per chunk" + + # Chunk 0: text overridden with original chunk text + assert result[0].fact_text == "Alice went to Paris.", "Text must be the raw chunk text" + assert result[0].entities == ["Alice", "Paris"] + + # Chunk 1: collapsed to one fact, entities merged from both LLM facts + assert result[1].fact_text == "Bob fixed the bug yesterday.", "Text must be the raw chunk text" + assert "Bob" in result[1].entities + assert "bug" in result[1].entities + + +@pytest.mark.asyncio +async def test_verbatim_extraction_mode(): + """ + Integration test for verbatim extraction mode. + + Verifies that: + 1. Each chunk produces exactly one fact + 2. The fact text is the original chunk text, not a paraphrase + 3. Entities are still extracted by the LLM + 4. Temporal info (occurred_start) is still extracted + """ + import os + + from hindsight_api import LLMConfig + from hindsight_api.config import _get_raw_config, clear_config_cache + from hindsight_api.engine.retain.fact_extraction import extract_facts_from_contents + from hindsight_api.engine.retain.types import RetainContent + + original_mode = os.getenv("HINDSIGHT_API_RETAIN_EXTRACTION_MODE") + + try: + os.environ["HINDSIGHT_API_RETAIN_EXTRACTION_MODE"] = "verbatim" + clear_config_cache() + + text = ( + "Alice joined the infrastructure team on March 5, 2024. " + "She holds a CKA certification and has 5 years of Kubernetes experience." + ) + + llm_config = LLMConfig.for_memory() + contents = [RetainContent(content=text, event_date=datetime(2024, 3, 10, tzinfo=timezone.utc), context="onboarding notes")] + facts, chunks, _ = await extract_facts_from_contents( + contents=contents, + llm_config=llm_config, + agent_name="TestAgent", + config=_get_raw_config(), + ) + + logger.info(f"Verbatim mode extracted {len(facts)} facts from {len(chunks)} chunks") + for i, f in enumerate(facts): + logger.info(f" fact[{i}]: {f.fact_text!r} entities={f.entities}") + + # One fact per chunk + assert len(facts) == len(chunks), "Verbatim mode must produce exactly one fact per chunk" + + # Text must match the original chunk exactly + for fact, chunk in zip(facts, chunks): + assert fact.fact_text == chunk.chunk_text, ( + f"fact_text must equal original chunk text.\n" + f" expected: {chunk.chunk_text!r}\n" + f" got: {fact.fact_text!r}" + ) + + # Entities should still be extracted + all_entities = [e for f in facts for e in f.entities] + assert any("alice" in e.lower() for e in all_entities), ( + f"Expected entity 'Alice' to be extracted. Entities: {all_entities}" + ) + + logger.info("✓ Verbatim mode preserves chunk text and still extracts entities") + + finally: + if original_mode is not None: + os.environ["HINDSIGHT_API_RETAIN_EXTRACTION_MODE"] = original_mode + else: + os.environ.pop("HINDSIGHT_API_RETAIN_EXTRACTION_MODE", None) + clear_config_cache() + + @pytest.mark.asyncio async def test_retain_batch_with_per_item_tags_on_document(memory, request_context): """ @@ -2349,7 +2455,7 @@ async def test_retain_batch_with_per_item_tags_on_document(memory, request_conte ) assert len(result) > 0, "Should have retained content" - print(f"\n=== Retained content with tags ===") + print("\n=== Retained content with tags ===") # Retrieve the document doc = await memory.get_document( @@ -2380,6 +2486,7 @@ async def test_retain_batch_with_per_item_tags_on_document(memory, request_conte def test_retain_mission_injected_into_prompt(): """Test that retain_mission is injected as a FOCUS section into any extraction mode.""" from unittest.mock import MagicMock + from hindsight_api.engine.retain.fact_extraction import _build_extraction_prompt_and_schema spec = "Focus on technical decisions and architecture choices only." @@ -2405,6 +2512,7 @@ def test_retain_mission_injected_into_prompt(): def test_retain_mission_absent_when_not_set(): """Test that no FOCUS section appears when retain_mission is not set.""" from unittest.mock import MagicMock + from hindsight_api.engine.retain.fact_extraction import _build_extraction_prompt_and_schema config = MagicMock() @@ -2421,6 +2529,7 @@ def test_retain_mission_absent_when_not_set(): def test_retain_mission_config_loaded_from_env(): """Test that retain_mission is loaded from env and is a configurable field.""" import os + from hindsight_api.config import HindsightConfig, _get_raw_config, clear_config_cache original = os.getenv("HINDSIGHT_API_RETAIN_MISSION") diff --git a/hindsight-control-plane/src/components/bank-config-view.tsx b/hindsight-control-plane/src/components/bank-config-view.tsx index ba1c63c7a..07105e912 100644 --- a/hindsight-control-plane/src/components/bank-config-view.tsx +++ b/hindsight-control-plane/src/components/bank-config-view.tsx @@ -452,7 +452,7 @@ export function BankConfigView() { /> - {["concise", "verbose", "verbatim", "custom"].map((opt) => ( + {["concise", "verbose", "verbatim", "index_only", "custom"].map((opt) => ( {opt} diff --git a/hindsight-docs/docs/developer/configuration.md b/hindsight-docs/docs/developer/configuration.md index 9c9b2d49c..701dac590 100644 --- a/hindsight-docs/docs/developer/configuration.md +++ b/hindsight-docs/docs/developer/configuration.md @@ -567,7 +567,7 @@ Controls the retain (memory ingestion) pipeline. |----------|-------------|---------| | `HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS` | Max completion tokens for fact extraction LLM calls | `64000` | | `HINDSIGHT_API_RETAIN_CHUNK_SIZE` | Max characters per chunk for fact extraction. Larger chunks extract fewer LLM calls but may lose context. | `3000` | -| `HINDSIGHT_API_RETAIN_EXTRACTION_MODE` | Fact extraction mode: `concise`, `verbose`, `verbatim`, or `custom` | `concise` | +| `HINDSIGHT_API_RETAIN_EXTRACTION_MODE` | Fact extraction mode: `concise`, `verbose`, `verbatim`, `index_only`, or `custom` | `concise` | | `HINDSIGHT_API_RETAIN_MISSION` | What this bank should pay attention to during extraction. Steers the LLM without replacing the extraction rules — works alongside any extraction mode. | - | | `HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS` | Full prompt override for fact extraction (only used when mode is `custom`). Replaces built-in extraction rules entirely. | - | | `HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS` | Extract causal relationships between facts | `true` | @@ -578,18 +578,19 @@ Controls the retain (memory ingestion) pipeline. #### Customizing retain: when to use what -There are four levels of customization for the retain pipeline. Start with the simplest that covers your needs: +There are five levels of customization for the retain pipeline. Start with the simplest that covers your needs: | Goal | Use | |------|-----| | Steer what topics to focus on or deprioritize | `HINDSIGHT_API_RETAIN_MISSION` | | Extract more detail per fact | `HINDSIGHT_API_RETAIN_EXTRACTION_MODE=verbose` | -| Store chunks as-is (indexing / RAG use case) | `HINDSIGHT_API_RETAIN_EXTRACTION_MODE=verbatim` | +| Store chunks as-is, LLM extracts metadata | `HINDSIGHT_API_RETAIN_EXTRACTION_MODE=verbatim` | +| Store chunks as-is, zero LLM cost | `HINDSIGHT_API_RETAIN_EXTRACTION_MODE=index_only` | | Completely replace the extraction rules | `HINDSIGHT_API_RETAIN_EXTRACTION_MODE=custom` + `HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS` | **`HINDSIGHT_API_RETAIN_MISSION` — steer extraction without replacing it (recommended starting point)** -Tell the bank what to pay attention to during extraction, in plain language. The mission is injected into the extraction prompt alongside the built-in rules — it narrows focus without replacing the underlying logic. Works with any extraction mode (`concise`, `verbose`, `verbatim`, `custom`). +Tell the bank what to pay attention to during extraction, in plain language. The mission is injected into the extraction prompt alongside the built-in rules — it narrows focus without replacing the underlying logic. Works with any extraction mode (`concise`, `verbose`, `verbatim`, `custom`). Ignored in `index_only` mode. ```bash export HINDSIGHT_API_RETAIN_MISSION="Focus on technical decisions, architecture choices, and team member expertise. Deprioritize social or personal information." @@ -607,6 +608,14 @@ Each chunk is stored as a single memory unit with its original text preserved ex export HINDSIGHT_API_RETAIN_EXTRACTION_MODE=verbatim ``` +**`HINDSIGHT_API_RETAIN_EXTRACTION_MODE=index_only` — zero LLM cost** + +Each chunk is stored as-is with no LLM call whatsoever. No entity extraction, no temporal indexing — only embeddings are generated for semantic search. User-provided entities passed via `RetainContent.entities` are the sole source of entity data. Use when ingestion speed and cost matter more than structured metadata. + +```bash +export HINDSIGHT_API_RETAIN_EXTRACTION_MODE=index_only +``` + **`HINDSIGHT_API_RETAIN_EXTRACTION_MODE=custom` + `HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS` — full control** Replaces the built-in selectivity rules entirely. The structural parts of the prompt (output format, temporal handling, coreference resolution) remain intact — only the extraction guidelines are replaced. From 100c0c8361994eb15985b48f7bc88f4b88b40feb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Tue, 17 Mar 2026 14:52:40 +0100 Subject: [PATCH 04/15] feat(retain): add named retain strategies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows mixing extraction modes in a single bank via named strategies. Each strategy is a set of hierarchical config overrides (extraction_mode, chunk_size, entity_labels, entities_allow_free_form, etc.) applied on top of the resolved bank config at retain time. - retain_strategies: dict of strategy_name → config overrides (bank config) - retain_default_strategy: default strategy when none specified (bank config) - strategy field on /retain request: per-call override - apply_strategy() in config_resolver applies overrides via dataclasses.replace() - strategy propagates through retain_batch_async → _retain_batch_async_internal and through the async worker task payload - Any hierarchical field is overridable per strategy, including entity_labels and entities_allow_free_form - Docs updated with strategy configuration example and RRF fairness note - Unit test for apply_strategy covering overrides, unknown strategy, and non-hierarchical field filtering --- hindsight-api-slim/hindsight_api/api/http.py | 8 +++- hindsight-api-slim/hindsight_api/config.py | 9 ++++ .../hindsight_api/config_resolver.py | 34 +++++++++++++- .../hindsight_api/engine/memory_engine.py | 15 +++++++ hindsight-api-slim/hindsight_api/main.py | 2 + hindsight-api-slim/tests/test_retain.py | 45 +++++++++++++++++++ .../docs/developer/configuration.md | 40 +++++++++++++++++ 7 files changed, 151 insertions(+), 2 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py index f7a36137c..25176ac8f 100644 --- a/hindsight-api-slim/hindsight_api/api/http.py +++ b/hindsight-api-slim/hindsight_api/api/http.py @@ -471,6 +471,11 @@ class RetainRequest(BaseModel): alias="async", description="If true, process asynchronously in background. If false, wait for completion (default: false)", ) + strategy: str | None = Field( + default=None, + description="Named retain strategy to use for this request. Overrides the bank's default strategy. " + "Strategies are defined in the bank config under 'retain_strategies'.", + ) document_tags: list[str] | None = Field( default=None, description="Deprecated. Use item-level tags instead.", @@ -4466,7 +4471,7 @@ async def api_retain( if request.async_: # Async processing: queue task and return immediately result = await app.state.memory.submit_async_retain( - bank_id, contents, document_tags=request.document_tags, request_context=request_context + bank_id, contents, document_tags=request.document_tags, strategy=request.strategy, request_context=request_context ) return RetainResponse.model_validate( { @@ -4499,6 +4504,7 @@ async def api_retain( bank_id=bank_id, contents=contents, document_tags=request.document_tags, + strategy=request.strategy, request_context=request_context, return_usage=True, outbox_callback=app.state.memory._build_retain_outbox_callback( diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py index a0b282cf2..f014d246d 100644 --- a/hindsight-api-slim/hindsight_api/config.py +++ b/hindsight-api-slim/hindsight_api/config.py @@ -267,6 +267,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE" ENV_RETAIN_MISSION = "HINDSIGHT_API_RETAIN_MISSION" ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS" +ENV_RETAIN_DEFAULT_STRATEGY = "HINDSIGHT_API_RETAIN_DEFAULT_STRATEGY" ENV_RETAIN_BATCH_TOKENS = "HINDSIGHT_API_RETAIN_BATCH_TOKENS" ENV_RETAIN_ENTITY_LOOKUP = "HINDSIGHT_API_RETAIN_ENTITY_LOOKUP" ENV_RETAIN_BATCH_ENABLED = "HINDSIGHT_API_RETAIN_BATCH_ENABLED" @@ -446,6 +447,8 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom", "verbatim", "index_only") # Allowed extraction modes DEFAULT_RETAIN_MISSION = None # Declarative spec of what to retain (injected into any extraction mode) DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom") +DEFAULT_RETAIN_DEFAULT_STRATEGY = None # Default strategy name (None = no strategy override) +DEFAULT_RETAIN_STRATEGIES: dict | None = None # Named retain strategies (dict of name → config overrides) DEFAULT_RETAIN_BATCH_TOKENS = 10_000 # ~40KB of text # Max chars per sub-batch for async retain auto-splitting DEFAULT_RETAIN_ENTITY_LOOKUP = "trigram" # "full" or "trigram" DEFAULT_RETAIN_BATCH_ENABLED = False # Use LLM Batch API for fact extraction (only when async=True) @@ -719,6 +722,8 @@ class HindsightConfig: retain_extraction_mode: str retain_mission: str | None retain_custom_instructions: str | None + retain_default_strategy: str | None + retain_strategies: dict | None retain_batch_tokens: int retain_batch_enabled: bool retain_batch_poll_interval_seconds: int @@ -849,6 +854,8 @@ class HindsightConfig: "retain_extraction_mode", "retain_mission", "retain_custom_instructions", + "retain_default_strategy", + "retain_strategies", # Entity labels (controlled vocabulary for entity classification) "entity_labels", "entities_allow_free_form", @@ -1175,6 +1182,8 @@ def from_env(cls) -> "HindsightConfig": ), retain_mission=os.getenv(ENV_RETAIN_MISSION) or DEFAULT_RETAIN_MISSION, retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS, + retain_default_strategy=os.getenv(ENV_RETAIN_DEFAULT_STRATEGY) or DEFAULT_RETAIN_DEFAULT_STRATEGY, + retain_strategies=DEFAULT_RETAIN_STRATEGIES, retain_batch_tokens=int(os.getenv(ENV_RETAIN_BATCH_TOKENS, str(DEFAULT_RETAIN_BATCH_TOKENS))), retain_entity_lookup=os.getenv(ENV_RETAIN_ENTITY_LOOKUP, DEFAULT_RETAIN_ENTITY_LOOKUP), retain_batch_enabled=os.getenv(ENV_RETAIN_BATCH_ENABLED, str(DEFAULT_RETAIN_BATCH_ENABLED)).lower() diff --git a/hindsight-api-slim/hindsight_api/config_resolver.py b/hindsight-api-slim/hindsight_api/config_resolver.py index e5de3946a..ce0b080e7 100644 --- a/hindsight-api-slim/hindsight_api/config_resolver.py +++ b/hindsight-api-slim/hindsight_api/config_resolver.py @@ -10,7 +10,7 @@ import json import logging -from dataclasses import asdict +from dataclasses import asdict, replace from typing import Any import asyncpg @@ -273,3 +273,35 @@ async def reset_bank_config(self, bank_id: str) -> None: ) logger.info(f"Reset bank config for {bank_id} to defaults") + + +def apply_strategy(config: HindsightConfig, strategy_name: str) -> HindsightConfig: + """ + Apply a named retain strategy's overrides on top of a resolved config. + + A strategy is a named set of hierarchical field overrides stored in + config.retain_strategies. Any field in _HIERARCHICAL_FIELDS can be + overridden, including retain_extraction_mode, retain_chunk_size, + entity_labels, entities_allow_free_form, etc. + + Unknown strategy names log a warning and return config unchanged. + Unknown or non-hierarchical fields in the strategy are silently ignored. + """ + strategies = config.retain_strategies or {} + if strategy_name not in strategies: + logger.warning(f"Unknown retain strategy '{strategy_name}', using resolved config as-is") + return config + + overrides = strategies[strategy_name] + if not isinstance(overrides, dict): + logger.warning(f"Retain strategy '{strategy_name}' is not a dict, skipping") + return config + + configurable = HindsightConfig.get_configurable_fields() + filtered = {k: v for k, v in overrides.items() if k in configurable} + + if not filtered: + return config + + logger.debug(f"Applying retain strategy '{strategy_name}': {list(filtered.keys())}") + return replace(config, **filtered) diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index d8ae44f76..1a11204f3 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -561,6 +561,7 @@ async def _handle_batch_retain(self, task_dict: dict[str, Any]): contents = task_dict.get("contents", []) document_tags = task_dict.get("document_tags") operation_id = task_dict.get("operation_id") # For batch API crash recovery + strategy = task_dict.get("strategy") logger.info( f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items, operation_id={operation_id}" @@ -584,6 +585,7 @@ async def _handle_batch_retain(self, task_dict: dict[str, Any]): document_tags=document_tags, request_context=context, operation_id=operation_id, + strategy=strategy, outbox_callback=self._build_retain_outbox_callback( bank_id=bank_id, contents=contents, @@ -1953,6 +1955,7 @@ async def retain_batch_async( return_usage: bool = False, operation_id: str | None = None, outbox_callback: "Callable[[asyncpg.Connection], Awaitable[None]] | None" = None, + strategy: str | None = None, ): """ Store multiple content items as memory units in ONE batch operation. @@ -2110,6 +2113,7 @@ async def retain_batch_async( confidence_score=confidence_score, document_tags=document_tags, operation_id=operation_id, + strategy=strategy, # Outbox callback runs inside the last sub-batch's transaction so the # webhook delivery row is committed atomically with the final retain data. outbox_callback=outbox_callback if i == len(sub_batches) else None, @@ -2134,6 +2138,7 @@ async def retain_batch_async( confidence_score=confidence_score, document_tags=document_tags, operation_id=operation_id, + strategy=strategy, outbox_callback=outbox_callback, ) @@ -2186,6 +2191,7 @@ async def _retain_batch_async_internal( document_tags: list[str] | None = None, operation_id: str | None = None, outbox_callback: "Callable[[asyncpg.Connection], Awaitable[None]] | None" = None, + strategy: str | None = None, ) -> tuple[list[list[str]], "TokenUsage"]: """ Internal method for batch processing without chunking logic. @@ -2218,6 +2224,12 @@ async def _retain_batch_async_internal( # Resolve bank-specific config for this operation resolved_config = await self._config_resolver.resolve_full_config(bank_id, request_context) + # Apply strategy overrides: explicit strategy > bank default strategy + from hindsight_api.config_resolver import apply_strategy + effective_strategy = strategy or resolved_config.retain_default_strategy + if effective_strategy: + resolved_config = apply_strategy(resolved_config, effective_strategy) + # Create parent span for retain operation with create_operation_span("retain", bank_id): return await orchestrator.retain_batch( @@ -7377,6 +7389,7 @@ async def submit_async_retain( *, request_context: "RequestContext", document_tags: list[str] | None = None, + strategy: str | None = None, ) -> dict[str, Any]: """Submit a batch retain operation to run asynchronously. @@ -7485,6 +7498,8 @@ async def submit_async_retain( task_payload: dict[str, Any] = {"contents": sub_batch} if document_tags: task_payload["document_tags"] = document_tags + if strategy: + task_payload["strategy"] = strategy # Pass tenant_id and api_key_id through task payload if request_context.tenant_id: task_payload["_tenant_id"] = request_context.tenant_id diff --git a/hindsight-api-slim/hindsight_api/main.py b/hindsight-api-slim/hindsight_api/main.py index c0e800660..7a0d536ba 100644 --- a/hindsight-api-slim/hindsight_api/main.py +++ b/hindsight-api-slim/hindsight_api/main.py @@ -257,6 +257,8 @@ def main(): retain_extraction_mode=config.retain_extraction_mode, retain_mission=config.retain_mission, retain_custom_instructions=config.retain_custom_instructions, + retain_default_strategy=config.retain_default_strategy, + retain_strategies=config.retain_strategies, retain_batch_tokens=config.retain_batch_tokens, retain_entity_lookup=config.retain_entity_lookup, retain_batch_enabled=config.retain_batch_enabled, diff --git a/hindsight-api-slim/tests/test_retain.py b/hindsight-api-slim/tests/test_retain.py index 84b176951..2da3e669f 100644 --- a/hindsight-api-slim/tests/test_retain.py +++ b/hindsight-api-slim/tests/test_retain.py @@ -2318,6 +2318,51 @@ async def test_custom_extraction_mode(): clear_config_cache() +def test_apply_strategy(): + """ + Unit test for apply_strategy: + - Known strategy applies overrides on top of resolved config + - Unknown strategy returns config unchanged with a warning + - Non-hierarchical fields in a strategy are silently ignored + - entity_labels and entities_allow_free_form are overridable + """ + from hindsight_api.config import _get_raw_config, clear_config_cache + from hindsight_api.config_resolver import apply_strategy + + clear_config_cache() + base_config = _get_raw_config() + + strategies = { + "documents": { + "retain_extraction_mode": "index_only", + "retain_chunk_size": 800, + "entities_allow_free_form": False, + }, + "bad_field": { + "database_url": "should-be-ignored", # static field, not hierarchical + "retain_extraction_mode": "verbose", + }, + } + config_with_strategies = base_config.__class__( + **{**base_config.__dict__, "retain_strategies": strategies} + ) + + # Known strategy: overrides applied + result = apply_strategy(config_with_strategies, "documents") + assert result.retain_extraction_mode == "index_only" + assert result.retain_chunk_size == 800 + assert result.entities_allow_free_form is False + + # Non-hierarchical field silently ignored, hierarchical one applied + result2 = apply_strategy(config_with_strategies, "bad_field") + assert result2.retain_extraction_mode == "verbose" + assert result2.database_url == base_config.database_url # unchanged + + # Unknown strategy: config returned unchanged + result3 = apply_strategy(config_with_strategies, "nonexistent") + assert result3.retain_extraction_mode == base_config.retain_extraction_mode + + def test_collapse_to_verbatim_single_fact_per_chunk(): """ Unit test for _collapse_to_verbatim: diff --git a/hindsight-docs/docs/developer/configuration.md b/hindsight-docs/docs/developer/configuration.md index 701dac590..af87ecea2 100644 --- a/hindsight-docs/docs/developer/configuration.md +++ b/hindsight-docs/docs/developer/configuration.md @@ -608,6 +608,46 @@ Each chunk is stored as a single memory unit with its original text preserved ex export HINDSIGHT_API_RETAIN_EXTRACTION_MODE=verbatim ``` +**`retain_strategies` / `retain_default_strategy` — per-call extraction strategy** + +Named strategies let you ingest different content types into the same bank using different extraction settings. A strategy is a set of hierarchical field overrides applied on top of the resolved bank config. + +Any field in the hierarchical config can be overridden per strategy, including `retain_extraction_mode`, `retain_chunk_size`, `entity_labels`, `entities_allow_free_form`, `retain_mission`, etc. + +Configure strategies via the bank config API: + +```json +{ + "retain_default_strategy": "conversations", + "retain_strategies": { + "conversations": { + "retain_extraction_mode": "concise", + "retain_chunk_size": 3000 + }, + "documents": { + "retain_extraction_mode": "index_only", + "retain_chunk_size": 800, + "entity_labels": null, + "entities_allow_free_form": false + } + } +} +``` + +Then specify the strategy at retain time: + +```python +# Uses default strategy ("conversations") +client.retain(bank_id, items=[{"content": "Alice joined the team today"}]) + +# Explicitly use document strategy +client.retain(bank_id, items=[{"content": "...document text..."}], strategy="documents") +``` + +If no `strategy` is specified in a retain call, `retain_default_strategy` is used. If neither is set, the bank/global config applies directly. + +> **Note on chunk size and retrieval fairness**: When mixing strategies with very different chunk sizes in the same bank, `index_only` and `verbatim` memories participate only in semantic retrieval (not entity graph or temporal paths). Smaller chunk sizes (e.g., 800 chars) produce more targeted embeddings and are recommended for document strategies to keep scores comparable with LLM-extracted facts. + **`HINDSIGHT_API_RETAIN_EXTRACTION_MODE=index_only` — zero LLM cost** Each chunk is stored as-is with no LLM call whatsoever. No entity extraction, no temporal indexing — only embeddings are generated for semantic search. User-provided entities passed via `RetainContent.entities` are the sole source of entity data. Use when ingestion speed and cost matter more than structured metadata. From a66ff19a15dbb5fec1eda0473ea8f604748b5e53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Tue, 17 Mar 2026 15:01:43 +0100 Subject: [PATCH 05/15] feat(retain): add per-item strategy and strategy tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `strategy` field to `MemoryItem` so individual items in a retain request can override the request-level strategy - Add `strategy` field to `FileRetainMetadata` for per-file strategy override in file retain requests - Group memory items by effective strategy in `api_retain`; each group is processed as a separate batch, results are aggregated - Thread strategy through `submit_async_file_retain` → `_handle_file_convert_retain` → retain task payload - Add `operation_ids` to `RetainResponse` for async requests with mixed per-item strategies - Add `test_strategy_overrides_extraction_mode_for_index_only`: unit test verifying a named strategy with index_only bypasses the LLM - Add `test_retain_request_per_item_strategy_field`: unit test for per-item strategy grouping logic --- hindsight-api-slim/hindsight_api/api/http.py | 94 ++++++++++++++----- .../hindsight_api/engine/memory_engine.py | 5 + hindsight-api-slim/tests/test_retain.py | 79 ++++++++++++++++ 3 files changed, 153 insertions(+), 25 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py index 25176ac8f..2fe245c55 100644 --- a/hindsight-api-slim/hindsight_api/api/http.py +++ b/hindsight-api-slim/hindsight_api/api/http.py @@ -425,6 +425,11 @@ class MemoryItem(BaseModel): "A list of tag lists runs one pass per inner list, giving full control over which combinations to use." ), ) + strategy: str | None = Field( + default=None, + description="Named retain strategy for this item. Overrides the request-level strategy for this item only. " + "Strategies are defined in the bank config under 'retain_strategies'.", + ) @field_validator("timestamp", mode="before") @classmethod @@ -496,6 +501,11 @@ class FileRetainMetadata(BaseModel): description="Parser or ordered fallback chain for this file (overrides request-level parser). " "E.g. 'iris' or ['iris', 'markitdown'].", ) + strategy: str | None = Field( + default=None, + description="Named retain strategy for this file. Overrides the bank's default strategy. " + "Strategies are defined in the bank config under 'retain_strategies'.", + ) class FileRetainRequest(BaseModel): @@ -549,7 +559,11 @@ class RetainResponse(BaseModel): ) operation_id: str | None = Field( default=None, - description="Operation ID for tracking async operations. Use GET /v1/default/banks/{bank_id}/operations to list operations. Only present when async=true.", + description="Operation ID for tracking async operations. Use GET /v1/default/banks/{bank_id}/operations to list operations. Only present when async=true. When items use different per-item strategies, use operation_ids instead.", + ) + operation_ids: list[str] | None = Field( + default=None, + description="Operation IDs when items were submitted as multiple strategy groups (async=true with mixed per-item strategies). operation_id is set to the first entry for backward compatibility.", ) usage: TokenUsage | None = Field( default=None, @@ -4446,10 +4460,13 @@ async def api_retain( metrics = get_metrics_collector() try: - # Prepare contents for processing - contents = [] + # Group items by effective strategy (item-level overrides request-level) + strategy_groups: dict[str | None, list[dict]] = {} for item in request.items: - content_dict = {"content": item.content} + effective = item.strategy if item.strategy is not None else request.strategy + if effective not in strategy_groups: + strategy_groups[effective] = [] + content_dict: dict = {"content": item.content} if item.timestamp == "unset": content_dict["event_date"] = None elif item.timestamp: @@ -4466,20 +4483,30 @@ async def api_retain( content_dict["tags"] = item.tags if item.observation_scopes is not None: content_dict["observation_scopes"] = item.observation_scopes - contents.append(content_dict) + strategy_groups[effective].append(content_dict) if request.async_: - # Async processing: queue task and return immediately - result = await app.state.memory.submit_async_retain( - bank_id, contents, document_tags=request.document_tags, strategy=request.strategy, request_context=request_context - ) + # Async processing: one submit per strategy group + all_operation_ids = [] + total_items_count = 0 + for group_strategy, contents in strategy_groups.items(): + result = await app.state.memory.submit_async_retain( + bank_id, + contents, + document_tags=request.document_tags, + strategy=group_strategy, + request_context=request_context, + ) + all_operation_ids.append(result["operation_id"]) + total_items_count += result["items_count"] return RetainResponse.model_validate( { "success": True, "bank_id": bank_id, - "items_count": result["items_count"], + "items_count": total_items_count, "async": True, - "operation_id": result["operation_id"], + "operation_id": all_operation_ids[0] if all_operation_ids else None, + "operation_ids": all_operation_ids if len(all_operation_ids) > 1 else None, } ) else: @@ -4498,25 +4525,41 @@ async def api_retain( ), ) - # Synchronous processing: wait for completion (record metrics) + # Synchronous processing: one batch per strategy group, aggregate results + total_items_count = 0 + total_usage = TokenUsage(input_tokens=0, output_tokens=0, total_tokens=0) with metrics.record_operation("retain", bank_id=bank_id, source="api"): - result, usage = await app.state.memory.retain_batch_async( - bank_id=bank_id, - contents=contents, - document_tags=request.document_tags, - strategy=request.strategy, - request_context=request_context, - return_usage=True, - outbox_callback=app.state.memory._build_retain_outbox_callback( + for group_strategy, contents in strategy_groups.items(): + result, usage = await app.state.memory.retain_batch_async( bank_id=bank_id, contents=contents, - operation_id=None, - schema=_current_schema.get(), - ), - ) + document_tags=request.document_tags, + strategy=group_strategy, + request_context=request_context, + return_usage=True, + outbox_callback=app.state.memory._build_retain_outbox_callback( + bank_id=bank_id, + contents=contents, + operation_id=None, + schema=_current_schema.get(), + ), + ) + total_items_count += len(contents) + if usage: + total_usage = TokenUsage( + input_tokens=total_usage.input_tokens + usage.input_tokens, + output_tokens=total_usage.output_tokens + usage.output_tokens, + total_tokens=total_usage.total_tokens + usage.total_tokens, + ) return RetainResponse.model_validate( - {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False, "usage": usage} + { + "success": True, + "bank_id": bank_id, + "items_count": total_items_count, + "async": False, + "usage": total_usage, + } ) except OperationValidationError as e: raise HTTPException(status_code=e.status_code, detail=e.reason) @@ -4679,6 +4722,7 @@ async def read(self): "tags": file_meta.tags or [], "timestamp": file_meta.timestamp, "parser": parser_chain, + "strategy": file_meta.strategy, } file_items.append(item) diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index 1a11204f3..fb0816688 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -714,6 +714,8 @@ async def _handle_file_convert_retain(self, task_dict: dict[str, Any]): retain_task_payload: dict[str, Any] = {"contents": retain_contents} if document_tags: retain_task_payload["document_tags"] = document_tags + if task_dict.get("strategy"): + retain_task_payload["strategy"] = task_dict["strategy"] # Pass tenant/api_key context through to retain task if task_dict.get("_tenant_id"): @@ -2226,6 +2228,7 @@ async def _retain_batch_async_internal( # Apply strategy overrides: explicit strategy > bank default strategy from hindsight_api.config_resolver import apply_strategy + effective_strategy = strategy or resolved_config.retain_default_strategy if effective_strategy: resolved_config = apply_strategy(resolved_config, effective_strategy) @@ -7614,6 +7617,8 @@ async def submit_async_file_retain( "document_tags": document_tags or [], "timestamp": item.get("timestamp"), } + if item.get("strategy"): + task_payload["strategy"] = item["strategy"] # Pass tenant_id and api_key_id through task payload if request_context.tenant_id: diff --git a/hindsight-api-slim/tests/test_retain.py b/hindsight-api-slim/tests/test_retain.py index 2da3e669f..1e38da084 100644 --- a/hindsight-api-slim/tests/test_retain.py +++ b/hindsight-api-slim/tests/test_retain.py @@ -2649,3 +2649,82 @@ def test_retain_mission_config_loaded_from_env(): else: os.environ["HINDSIGHT_API_RETAIN_MISSION"] = original clear_config_cache() + + +def test_strategy_overrides_extraction_mode_for_index_only(): + """ + Unit test: a named strategy with retain_extraction_mode=index_only causes + extract_facts_from_contents to skip the LLM and return verbatim chunks. + """ + import asyncio + + from hindsight_api.config import _get_raw_config, clear_config_cache + from hindsight_api.config_resolver import apply_strategy + from hindsight_api.engine.retain.fact_extraction import extract_facts_from_contents + from hindsight_api.engine.retain.types import RetainContent + + clear_config_cache() + base_config = _get_raw_config() + + # Build a config that has a strategy overriding to index_only + strategies = {"fast": {"retain_extraction_mode": "index_only"}} + config_with_strategies = base_config.__class__( + **{**base_config.__dict__, "retain_strategies": strategies} + ) + strategy_config = apply_strategy(config_with_strategies, "fast") + assert strategy_config.retain_extraction_mode == "index_only" + + contents = [ + RetainContent(content="Alice deployed the new API on Monday."), + RetainContent(content="Bob reviewed the pull request."), + ] + + facts, chunks, usage = asyncio.get_event_loop().run_until_complete( + extract_facts_from_contents( + contents=contents, + llm_config=None, # index_only must not call the LLM + agent_name="TestAgent", + config=strategy_config, + ) + ) + + assert len(facts) == 2 + assert facts[0].fact_text == contents[0].content + assert facts[1].fact_text == contents[1].content + assert usage.total_tokens == 0 + logger.info("✓ strategy with index_only mode: no LLM, verbatim chunks, zero tokens") + + +def test_retain_request_per_item_strategy_field(): + """ + Unit test: MemoryItem accepts a strategy field; items with different strategies + are grouped correctly by effective strategy (item.strategy ?? request.strategy). + """ + from hindsight_api.api.http import MemoryItem, RetainRequest + + request = RetainRequest.model_validate( + { + "items": [ + {"content": "Alice joined.", "strategy": "fast"}, + {"content": "Bob left.", "strategy": "detailed"}, + {"content": "Carol arrived."}, # no per-item strategy + ], + "strategy": "default_strategy", + } + ) + + assert request.items[0].strategy == "fast" + assert request.items[1].strategy == "detailed" + assert request.items[2].strategy is None # will inherit request.strategy downstream + + # Simulate grouping logic from api_retain handler + strategy_groups: dict = {} + for item in request.items: + effective = item.strategy if item.strategy is not None else request.strategy + strategy_groups.setdefault(effective, []).append(item.content) + + assert set(strategy_groups.keys()) == {"fast", "detailed", "default_strategy"} + assert strategy_groups["fast"] == ["Alice joined."] + assert strategy_groups["detailed"] == ["Bob left."] + assert strategy_groups["default_strategy"] == ["Carol arrived."] + logger.info("✓ per-item strategy grouping works correctly") From 29090907086cb64ab47de5acf3b397a52e4fb2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Tue, 17 Mar 2026 15:06:38 +0100 Subject: [PATCH 06/15] feat(ui): add retain strategies and default strategy to bank config UI - Add StrategiesEditor component: per-strategy cards with name input and JSON overrides textarea; supports add/remove; validates JSON inline - Add Default Strategy text input (retain_default_strategy) - Update RetainEdits type and retainSlice() to include both new fields - Regenerate OpenAPI spec (retain_strategies, retain_default_strategy, per-item strategy on MemoryItem/FileRetainMetadata, operation_ids on RetainResponse) --- .../src/components/bank-config-view.tsx | 171 ++++++++++++++++++ hindsight-docs/static/openapi.json | 41 ++++- 2 files changed, 211 insertions(+), 1 deletion(-) diff --git a/hindsight-control-plane/src/components/bank-config-view.tsx b/hindsight-control-plane/src/components/bank-config-view.tsx index 9646c559a..ecfd45117 100644 --- a/hindsight-control-plane/src/components/bank-config-view.tsx +++ b/hindsight-control-plane/src/components/bank-config-view.tsx @@ -35,6 +35,8 @@ type RetainEdits = { retain_extraction_mode: string | null; retain_mission: string | null; retain_custom_instructions: string | null; + retain_default_strategy: string | null; + retain_strategies: Record> | null; }; type ObservationsEdits = { @@ -140,6 +142,8 @@ function retainSlice(config: Record): RetainEdits { retain_extraction_mode: config.retain_extraction_mode ?? null, retain_mission: config.retain_mission ?? null, retain_custom_instructions: config.retain_custom_instructions ?? null, + retain_default_strategy: config.retain_default_strategy ?? null, + retain_strategies: config.retain_strategies ?? null, }; } @@ -483,6 +487,25 @@ export function BankConfigView() { rows={5} /> )} + + + setRetainEdits((prev) => ({ + ...prev, + retain_default_strategy: e.target.value || null, + })) + } + placeholder="e.g. fast" + /> + + setRetainEdits((prev) => ({ ...prev, retain_strategies: v }))} + /> {/* Entity Labels Section */} @@ -755,6 +778,154 @@ export function BankConfigView() { ); } +// ─── StrategiesEditor ───────────────────────────────────────────────────────── + +type StrategyEntry = { name: string; jsonText: string; parseError: string | null }; + +function toEntries(strategies: Record> | null): StrategyEntry[] { + if (!strategies) return []; + return Object.entries(strategies).map(([name, overrides]) => ({ + name, + jsonText: JSON.stringify(overrides, null, 2), + parseError: null, + })); +} + +function StrategiesEditor({ + value, + onChange, +}: { + value: Record> | null; + onChange: (v: Record> | null) => void; +}) { + const [entries, setEntries] = useState(() => toEntries(value)); + + // Sync when parent resets (e.g. after save/load) + const valueKey = JSON.stringify(value); + useEffect(() => { + setEntries(toEntries(value)); + + }, [valueKey]); + + const emit = (next: StrategyEntry[]) => { + const allValid = next.every((e) => e.parseError === null); + if (allValid) { + if (next.length === 0) { + onChange(null); + } else { + const dict: Record> = {}; + for (const e of next) { + if (e.name.trim()) { + try { + dict[e.name.trim()] = JSON.parse(e.jsonText); + } catch { + // skip invalid + } + } + } + onChange(Object.keys(dict).length > 0 ? dict : null); + } + } + }; + + const update = (i: number, patch: Partial) => { + const next = entries.map((e, idx) => { + if (idx !== i) return e; + const updated = { ...e, ...patch }; + if ("jsonText" in patch) { + try { + JSON.parse(patch.jsonText!); + updated.parseError = null; + } catch { + updated.parseError = "Invalid JSON"; + } + } + return updated; + }); + setEntries(next); + emit(next); + }; + + const add = () => { + const next = [ + ...entries, + { name: "", jsonText: '{\n "retain_extraction_mode": "index_only"\n}', parseError: null }, + ]; + setEntries(next); + emit(next); + }; + + const remove = (i: number) => { + const next = entries.filter((_, idx) => idx !== i); + setEntries(next); + emit(next); + }; + + return ( +
+
+
+

Retain Strategies

+

+ Named config override sets. Each strategy is a JSON object of hierarchical config fields + (e.g. retain_extraction_mode,{" "} + retain_chunk_size). Pass the strategy name on + retain requests to apply its overrides. +

+
+ {entries.length > 0 && ( + + {entries.length} {entries.length === 1 ? "strategy" : "strategies"} + + )} +
+ + {entries.length === 0 && ( +

No strategies defined.

+ )} + +
+ {entries.map((entry, i) => ( +
+
+ update(i, { name: e.target.value })} + className="h-8 text-xs font-mono flex-1" + /> + +
+