vstorm-co · brgsk · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -104,6 +104,12 @@ async with memory:
 
 **User isolation is mandatory**: All retrieval and storage operations require and filter by `user_id`. No cross-user queries possible by design.
 
+**Extraction validation (`_pipeline.py`)**: `_validate_extraction` enforces a single check: confidence >= 0.7. All regex filters (third-person, first-person pronouns, relative time, assistant-sourced patterns) were removed because they only work for English. Quality enforcement is handled entirely at the prompt level via ATOMIZATION_RULES and EXCLUSIONS in `prompts.py`.
+
+**`temporal.py` regex gotchas**: Avoid broad word matches in `_RELATIVE_PATTERNS` (e.g. "later"/"earlier" match adjective usage). `_UNTIL_PATTERN` must not include bare "to" (matches non-temporal uses). `_SINCE_PATTERN` uses `began` not `began?` (the `?` makes "a" optional).
+
+**PR reviews**: The `claude` bot posts automated code reviews on PRs. Check with `gh pr view <N> --comments` and address feedback before merging.
+
 ## Code Style
 
 - Line length: 135 characters

diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import logging
-import re
 from typing import TYPE_CHECKING
 
 from memv.models import (
@@ -12,22 +11,13 @@
     Message,
     SemanticKnowledge,
 )
-from memv.processing.temporal import backfill_temporal_fields, contains_relative_time
+from memv.processing.temporal import backfill_temporal_fields
 
 if TYPE_CHECKING:
     from memv.memory._lifecycle import LifecycleManager
 
 logger = logging.getLogger(__name__)
 
-# Compiled regexes for validation checks
-_THIRD_PERSON_RE = re.compile(r"^[Uu]ser\b")
-_FIRST_PERSON_RE = re.compile(r"(?<!/)\b(I(?!/)|[Mm][Yy]|[Mm][Ee]|[Ww][Ee]|[Oo][Uu][Rr])\b")
-# Requires infinitive ("was advised to") to avoid rejecting legitimate facts like "User was given a promotion"
-_ASSISTANT_SOURCE_RE = re.compile(
-    r"\b(?:was|were)\s+(?:advised|suggested|recommended|told|instructed|encouraged)\s+to\b",
-    re.IGNORECASE,
-)
-
 
 class Pipeline:
     """Handles message processing, episode generation, and knowledge extraction."""
@@ -168,7 +158,7 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
 
         # 6. Backfill temporal fields from temporal_info
         for item in extracted:
-            if item.temporal_info and (item.valid_at is None or item.invalid_at is None):
+            if item.temporal_info:
                 item.valid_at, item.invalid_at = backfill_temporal_fields(
                     item.temporal_info,
                     item.valid_at,
@@ -218,34 +208,11 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
         return stored_count
 
     def _validate_extraction(self, item: ExtractedKnowledge) -> bool:
-        """Filter extractions that are low-confidence or not self-contained.
-
-        Scope: only User-subject statements pass. Third-party facts ("Bob prefers
-        Postgres") are intentionally dropped — the knowledge base stores facts
-        about the user, not about people mentioned in conversation.
-        """
+        """Filter low-confidence extractions."""
         if item.confidence < 0.7:
             logger.debug("Rejected (low confidence %.2f): %s", item.confidence, item.statement[:60])
             return False
 
-        stmt = item.statement
-
-        if not _THIRD_PERSON_RE.match(stmt):
-            logger.debug("Rejected (not third-person): %s", stmt[:60])
-            return False
-
-        if _FIRST_PERSON_RE.search(stmt):
-            logger.debug("Rejected (first-person pronoun): %s", stmt[:60])
-            return False
-
-        if contains_relative_time(stmt):
-            logger.debug("Rejected (unresolved relative time): %s", stmt[:60])
-            return False
-
-        if _ASSISTANT_SOURCE_RE.search(stmt):
-            logger.debug("Rejected (assistant-sourced): %s", stmt[:60])
-            return False
-
         return True
 
     async def _is_duplicate_knowledge(self, embedding: list[float], user_id: str) -> tuple[bool, float]:

diff --git a/src/memv/processing/prompts.py b/src/memv/processing/prompts.py
@@ -66,11 +66,16 @@
 - Obvious or common knowledge
 - Speculative or uncertain claims
 - Conversation events ("User asked about X", "User requested Y") - extract the FACT, not the action
-- Assistant-sourced knowledge ("was advised to", "was suggested to", "was recommended to")
 
-**CRITICAL SOURCE RULE - READ CAREFULLY:**
-- ONLY extract facts the USER explicitly stated in their own messages
-- NEVER extract assistant suggestions, recommendations, or code examples as user facts
+**CRITICAL SOURCE RULES - READ CAREFULLY:**
+- Extract facts the user explicitly stated in their own messages
+- DO extract factual information communicated by the assistant (dates, appointments, confirmations, scheduled events)
+- NEVER extract assistant suggestions, recommendations, or hypotheticals as user facts
+  ("was advised to", "was suggested to", "was recommended to")
+- Treat assistant suggestions about user intent as speculative — never encode them as facts
+- NEVER attribute intent/action/statement to user if it originated in a hypothetical/suggestion/conditional from the assistant
+- Ignore assistant-led topics unless user acts on them
+- Attribute preferences only to explicit user claims — never to questions or reactions
 - If assistant says "use Python" and user doesn't respond with "yes" or confirm - DO NOT extract "User uses Python"
 - If assistant provides code in language X but user says "I use Y" - extract Y, not X
 - The user ASKING about something is NOT the same as the user USING it
@@ -97,7 +102,7 @@
 **REQUIRE in output statements:**
 - Absolute dates when temporal info exists: "on [resolved date]", not "yesterday"
 - Specific names: "User's React project at Vstorm", not "the project"
-- Third person with "User" as subject: "User prefers Python", not "I prefer Python"
+- Third person: "User prefers Python", not "I prefer Python"
 
 **Coreference resolution — resolve BEFORE writing the statement:**
 - "my kids" → "User's children"
@@ -231,16 +236,18 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 {reference_timestamp}
 </reference_timestamp>
 You MUST resolve ALL relative dates using this timestamp.
-Statements with unresolved relative time ("yesterday", "last week") will be REJECTED.
+Statements with unresolved relative time ("yesterday", "last week") are INVALID — resolve them or omit.
 """
 
     return f"""Extract HIGH-VALUE, PERSISTENT knowledge from this conversation.
 {timestamp_section}
 
-**CRITICAL RULE: Extract ONLY from lines starting with ">>> USER:"**
-- These are the user's actual words - the ONLY source of truth
-- IGNORE all ASSISTANT lines completely
-- Do NOT infer, expand, or modify what the user said
+**SOURCE RULES:**
+- Lines starting with ">>> USER:" are the user's actual words — the primary source of truth
+- DO extract factual information communicated by the assistant (dates, appointments, confirmations)
+- Do NOT extract assistant suggestions, recommendations, or hypotheticals as user facts
+- Treat assistant suggestions about user intent as speculative — never encode them as facts
+- Attribute preferences only to explicit user claims — never to assistant questions or reactions
 - Preserve the user's exact phrasing and technical terms
 
 <episode_context>
@@ -294,7 +301,7 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 ## Output Format
 
 For each extracted item, specify:
-- statement: A clean, declarative fact about the user (third-person: "User...", not "I...")
+- statement: A concrete, self-contained fact from the conversation (see SOURCE RULES above)
 - knowledge_type: "new"
 - temporal_info: Human-readable description if mentioned ("since January 2024", "until next month")
 - valid_at: ISO 8601 datetime when fact became true, or null if unknown/always true (e.g., "2024-01-01T00:00:00Z")
@@ -313,13 +320,15 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 {reference_timestamp}
 </reference_timestamp>
 You MUST resolve ALL relative dates using this timestamp.
-Statements with unresolved relative time ("yesterday", "last week") will be REJECTED.
+Statements with unresolved relative time ("yesterday", "last week") are INVALID — resolve them or omit.
 """
 
     return f"""Extract valuable knowledge by comparing actual conversation with predicted content.
 {timestamp_section}
-**CRITICAL: Extract ONLY from lines starting with ">>> USER:" - these are the user's actual words.**
-**IGNORE all ASSISTANT lines - those are suggestions, not user facts.**
+**SOURCE RULES:**
+- Lines starting with ">>> USER:" are the user's actual words — the primary source of truth.
+- DO extract factual info communicated by the assistant (dates, appointments, confirmations).
+- Do NOT extract assistant suggestions, recommendations, or hypotheticals as user facts.
 
 <prediction>
 {prediction}
@@ -371,7 +380,7 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 ## Output Format
 
 For each extracted item, specify:
-- statement: A fact the USER explicitly stated (not assistant suggestions)
+- statement: A concrete, self-contained fact from the conversation (see SOURCE RULES above)
 - knowledge_type: "new" if entirely new, "update" if refines existing, "contradiction" if conflicts
 - temporal_info: Human-readable description if mentioned ("since January 2024", "until next month")
 - valid_at: ISO 8601 datetime when fact became true, or null if unknown/always true (e.g., "2024-01-01T00:00:00Z")

diff --git a/tests/test_atomization.py b/tests/test_atomization.py
@@ -40,66 +40,30 @@ def _make_memory(tmp_path, llm, embedder):
 
 
 # ---------------------------------------------------------------------------
-# Validation filter: unit-level via e2e pipeline
+# Confidence threshold
 # ---------------------------------------------------------------------------
 
 
-async def test_rejects_first_person(tmp_path):
-    """Statement with 'I' / 'my' gets filtered out."""
+async def test_rejects_low_confidence(tmp_path):
+    """Low-confidence items rejected."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User mentioned that my team uses React", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "I like Python", "Cool!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
-async def test_rejects_non_third_person(tmp_path):
-    """Statement not starting with 'User' gets filtered."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="He likes Python", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "I like Python", "Cool!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
-async def test_rejects_relative_time(tmp_path):
-    """Statement with unresolved 'yesterday' gets filtered."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User started using FastAPI yesterday", knowledge_type="new", confidence=0.9)])],
+        [_extraction([ExtractedKnowledge(statement="User might like Rust", knowledge_type="new", confidence=0.3)])],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)
     async with memory:
-        await memory.add_exchange("user1", "I started using FastAPI yesterday", "Nice!", timestamp=_ts())
+        await memory.add_exchange("user1", "Maybe Rust?", "Interesting!", timestamp=_ts())
         count = await memory.process("user1")
         assert count == 0
 
 
-async def test_accepts_clean_statement(tmp_path):
-    """Properly atomized statement passes all checks."""
+async def test_accepts_high_confidence(tmp_path):
+    """Statement with confidence >= 0.7 passes."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
@@ -120,76 +84,66 @@ async def test_accepts_clean_statement(tmp_path):
         assert count == 1
 
 
-async def test_rejects_assistant_sourced(tmp_path):
-    """Statement with 'was advised to' pattern gets filtered."""
+async def test_accepts_assistant_communicated_fact(tmp_path):
+    """Facts communicated by the assistant (dates, appointments) are accepted."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User was advised to try Emacs", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "The assistant told me to try Emacs", "Sure!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
-async def test_accepts_passive_without_infinitive(tmp_path):
-    """'was given a promotion' is NOT assistant-sourced — should pass."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User was given a promotion", knowledge_type="new", confidence=0.9)])],
+        [
+            _extraction(
+                [
+                    ExtractedKnowledge(
+                        statement="User's dentist appointment was moved to March 15, 2024", knowledge_type="new", confidence=0.9
+                    )
+                ]
+            )
+        ],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)
     async with memory:
-        await memory.add_exchange("user1", "I was given a promotion", "Congrats!", timestamp=_ts())
+        await memory.add_exchange("user1", "When is my appointment?", "It was moved to March 15", timestamp=_ts())
         count = await memory.process("user1")
         assert count == 1
 
+        result = await memory.retrieve("dentist appointment", user_id="user1")
+        assert "March 15, 2024" in result.retrieved_knowledge[0].statement
+
 
-async def test_accepts_lowercase_user(tmp_path):
-    """LLM may output lowercase 'user' — should still pass third-person check."""
+async def test_confidence_boundary(tmp_path):
+    """Confidence exactly at 0.7 passes, below does not."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="user prefers Python", knowledge_type="new", confidence=0.9)])],
+        [
+            _extraction(
+                [
+                    ExtractedKnowledge(statement="User works at Vstorm", knowledge_type="new", confidence=0.7),
+                    ExtractedKnowledge(statement="User likes tea", knowledge_type="new", confidence=0.69),
+                ]
+            )
+        ],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)
     async with memory:
-        await memory.add_exchange("user1", "I prefer Python", "Nice!", timestamp=_ts())
+        await memory.add_exchange("user1", "I work at Vstorm and like tea", "Nice!", timestamp=_ts())
         count = await memory.process("user1")
         assert count == 1
 
+        result = await memory.retrieve("Vstorm", user_id="user1")
+        assert result.retrieved_knowledge[0].statement == "User works at Vstorm"
 
-async def test_confidence_filter_unchanged(tmp_path):
-    """Low-confidence items still rejected (regression check)."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User might like Rust", knowledge_type="new", confidence=0.3)])],
-    )
 
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "Maybe Rust?", "Interesting!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
+# ---------------------------------------------------------------------------
+# Temporal backfill
+# ---------------------------------------------------------------------------
 
 
 async def test_temporal_backfill_in_pipeline(tmp_path):
@@ -247,11 +201,11 @@ def test_warm_extraction_contains_atomization_rules(self):
 
     def test_cold_start_contains_strong_timestamp_wording(self):
         prompt = cold_start_extraction_prompt("Test", [{"role": "user", "content": "hi"}], "2024-06-15T12:00:00Z")
-        assert "will be REJECTED" in prompt
+        assert "are INVALID" in prompt
 
     def test_warm_extraction_contains_strong_timestamp_wording(self):
         prompt = extraction_prompt_with_prediction("prediction", ">>> USER: hi", "2024-06-15T12:00:00Z")
-        assert "will be REJECTED" in prompt
+        assert "are INVALID" in prompt
 
     def test_atomization_rules_constant_exists(self):
         assert "Self-Contained Statement Rules" in ATOMIZATION_RULES