From b234200b6667803ab1a7581e10845b739069264a Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 01:20:28 +0100
Subject: [PATCH 01/10] feat(extraction): add atomization for self-contained
 knowledge

Add prompt-level and code-level enforcement to ensure extracted
knowledge statements are independently interpretable without
conversation context.

Prompt changes:
- ATOMIZATION_RULES constant prohibiting pronouns, relative time,
  ambiguous references, and first-person in extracted statements
- Stronger reference_timestamp wording (REJECTED vs resolve)
- Good/bad atomization examples in both extraction prompts

Code-level validation (_pipeline.py):
- Third-person check (statement must start with "User")
- First-person pronoun rejection (I, my, me, we, our)
- Unresolved relative time rejection (yesterday, last week, etc.)
- Assistant-sourced pattern rejection (was advised/suggested)
- Temporal backfill step between extract and validate

New temporal module (src/memv/processing/temporal.py):
- contains_relative_time: regex detection of unresolved patterns
- parse_temporal_expression: relative-to-absolute via dateutil
- backfill_temporal_fields: populate valid_at/invalid_at from
  temporal_info strings when LLM leaves them None

Dependency: python-dateutil>=2.9.0
Tests: 179 total (48 new), all passing
---
 pyproject.toml                  |   1 +
 src/memv/memory/_pipeline.py    |  45 ++++++-
 src/memv/processing/prompts.py  |  49 ++++++-
 src/memv/processing/temporal.py | 146 +++++++++++++++++++++
 tests/test_atomization.py       | 221 ++++++++++++++++++++++++++++++++
 tests/test_extractor.py         |  37 ++++++
 tests/test_memory_e2e.py        |  42 +++++-
 tests/test_temporal.py          | 205 +++++++++++++++++++++++++++++
 uv.lock                         |   2 +
 9 files changed, 737 insertions(+), 11 deletions(-)
 create mode 100644 src/memv/processing/temporal.py
 create mode 100644 tests/test_atomization.py
 create mode 100644 tests/test_temporal.py

diff --git a/pyproject.toml b/pyproject.toml
index 1b05fa2..a6eca69 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ dependencies = [
     "openai>=2.15.0",
     "pydantic>=2.12.5",
     "pydantic-ai>=1.43.0",
+    "python-dateutil>=2.9.0",
     "sqlite-vec>=0.1.6",
     "textual>=3.5.0",
 ]
diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
index 8bfdaab..8b3482a 100644
--- a/src/memv/memory/_pipeline.py
+++ b/src/memv/memory/_pipeline.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import re
 from typing import TYPE_CHECKING
 
 from memv.models import (
@@ -11,12 +12,17 @@
     Message,
     SemanticKnowledge,
 )
+from memv.processing.temporal import backfill_temporal_fields, contains_relative_time
 
 if TYPE_CHECKING:
     from memv.memory._lifecycle import LifecycleManager
 
 logger = logging.getLogger(__name__)
 
+# Compiled regexes for validation checks
+_FIRST_PERSON_RE = re.compile(r"\b(I|my|me|we|our)\b")
+_ASSISTANT_SOURCE_RE = re.compile(r"\b(?:was|were)\s+(?:advised|suggested|recommended)\b", re.IGNORECASE)
+
 
 class Pipeline:
     """Handles message processing, episode generation, and knowledge extraction."""
@@ -155,10 +161,20 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
             existing_knowledge=existing.retrieved_knowledge,
         )
 
-        # 6. Filter out low-quality extractions
+        # 6. Backfill temporal fields from temporal_info
+        for item in extracted:
+            if item.temporal_info and (item.valid_at is None or item.invalid_at is None):
+                item.valid_at, item.invalid_at = backfill_temporal_fields(
+                    item.temporal_info,
+                    item.valid_at,
+                    item.invalid_at,
+                    episode.end_time,
+                )
+
+        # 7. Filter out low-quality extractions
         extracted = [item for item in extracted if self._validate_extraction(item)]
 
-        # 7. Convert to SemanticKnowledge and store with embeddings
+        # 8. Convert to SemanticKnowledge and store with embeddings
         if not extracted:
             return 0
 
@@ -166,7 +182,7 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
         statements = [item.statement for item in extracted]
         embeddings = await self._lc.embedder.embed_batch(statements)
 
-        # 8. Process each extracted item
+        # 9. Process each extracted item
         stored_count = 0
         for item, embedding in zip(extracted, embeddings, strict=True):
             # Handle contradictions
@@ -197,9 +213,30 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
         return stored_count
 
     def _validate_extraction(self, item: ExtractedKnowledge) -> bool:
-        """Filter low-confidence extractions."""
+        """Filter extractions that are low-confidence or not self-contained."""
+        stmt = item.statement
+        preview = stmt[:60]
+
         if item.confidence < 0.7:
+            logger.debug("Rejected (low confidence %.2f): %s", item.confidence, preview)
+            return False
+
+        if not stmt.startswith("User"):
+            logger.debug("Rejected (not third-person): %s", preview)
+            return False
+
+        if _FIRST_PERSON_RE.search(stmt):
+            logger.debug("Rejected (first-person pronoun): %s", preview)
             return False
+
+        if contains_relative_time(stmt):
+            logger.debug("Rejected (unresolved relative time): %s", preview)
+            return False
+
+        if _ASSISTANT_SOURCE_RE.search(stmt):
+            logger.debug("Rejected (assistant-sourced): %s", preview)
+            return False
+
         return True
 
     async def _is_duplicate_knowledge(self, embedding: list[float], user_id: str) -> tuple[bool, float]:
diff --git a/src/memv/processing/prompts.py b/src/memv/processing/prompts.py
index bb72fed..7351315 100644
--- a/src/memv/processing/prompts.py
+++ b/src/memv/processing/prompts.py
@@ -66,6 +66,7 @@
 - Obvious or common knowledge
 - Speculative or uncertain claims
 - Conversation events ("User asked about X", "User requested Y") - extract the FACT, not the action
+- Assistant-sourced knowledge ("was advised to", "was suggested to", "was recommended to")
 
 **CRITICAL SOURCE RULE - READ CAREFULLY:**
 - ONLY extract facts the USER explicitly stated in their own messages
@@ -77,6 +78,34 @@
 - Extract opinions WITH reasons when stated: "User finds X too basic" or "User likes Y because it's intuitive"
 """
 
+# =============================================================================
+# ATOMIZATION RULES
+# Self-contained statement constraints for extraction quality.
+# =============================================================================
+
+ATOMIZATION_RULES = """
+## Self-Contained Statement Rules (MANDATORY)
+
+Every extracted statement MUST be independently interpretable without conversation context.
+
+**PROHIBIT in output statements:**
+- Pronouns as subjects: "he", "she", "they", "it" — use the actual name or "User"
+- Relative time: "yesterday", "today", "tomorrow", "last week", "next month", "recently", "soon"
+- Ambiguous references: "the project", "the tool", "that place", "the same thing"
+- First person: "I", "my", "me", "we", "our"
+
+**REQUIRE in output statements:**
+- Absolute dates when temporal info exists: "on 2024-06-15", not "yesterday"
+- Specific names: "User's React project at Vstorm", not "the project"
+- Third person with "User" as subject: "User prefers Python", not "I prefer Python"
+
+**Coreference resolution — resolve BEFORE writing the statement:**
+- "my kids" → "User's children"
+- "he said" → "[Name from conversation] said"
+- "this place" → "[specific location mentioned]"
+- "the same library" → "[library name]"
+"""
+
 # =============================================================================
 # BOUNDARY DETECTION SIGNALS
 # Patterns that indicate episode boundaries.
@@ -201,7 +230,8 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 <reference_timestamp>
 {reference_timestamp}
 </reference_timestamp>
-Use this to resolve relative dates ("yesterday", "next week", "last month") into absolute ISO 8601 dates.
+You MUST resolve ALL relative dates using this timestamp.
+Statements with unresolved relative time ("yesterday", "last week") will be REJECTED.
 """
 
     return f"""Extract HIGH-VALUE, PERSISTENT knowledge from this conversation.
@@ -227,6 +257,8 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 
 {EXCLUSIONS}
 
+{ATOMIZATION_RULES}
+
 ## Examples
 
 ### GOOD Extractions:
@@ -240,9 +272,13 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 - "User finds Faiss too basic for their needs"
 - "User had problems with Milvus due to hosting overhead"
 - "User uses JavaScript" (correct third-person form)
+- "User started using FastAPI on 2024-06-14" (absolute date, not "yesterday")
+- "User moved to Berlin in 2023" (resolved, not "last year")
 
 ### BAD Extractions:
 - "I use JavaScript" (raw copy - should be "User uses JavaScript")
+- "He started using it yesterday" (unresolved pronoun + relative time → "User started using FastAPI on 2024-06-14")
+- "They moved there last year" (unresolved pronoun + relative time → "User moved to Berlin in 2023")
 - "User requested code for X" (conversation event, not fact about user)
 - "User discussed X" (conversation event)
 - "User asked about X" (conversation event - extract the preference instead)
@@ -250,6 +286,7 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 - Expanding ANY acronym the user used (RAG, API, LLM, PDF, etc.)
 - "User is interested in machine learning" (too vague)
 - "User received advice about X" (conversation event)
+- "User was advised to use X" (assistant-sourced, not user fact)
 - "User prefers Python" when assistant suggested Python but user didn't confirm (VIOLATES SOURCE RULE)
 - "User uses library X" when assistant recommended it but user didn't adopt it (VIOLATES SOURCE RULE)
 - "User is using Python" when assistant provided Python code but user said "I use JavaScript" (WRONG - user stated JavaScript)
@@ -275,7 +312,8 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 <reference_timestamp>
 {reference_timestamp}
 </reference_timestamp>
-Use this to resolve relative dates ("yesterday", "next week", "last month") into absolute ISO 8601 dates.
+You MUST resolve ALL relative dates using this timestamp.
+Statements with unresolved relative time ("yesterday", "last week") will be REJECTED.
 """
 
     return f"""Extract valuable knowledge by comparing actual conversation with predicted content.
@@ -304,6 +342,8 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 
 {EXCLUSIONS}
 
+{ATOMIZATION_RULES}
+
 ## Examples
 
 ### GOOD Extractions (concrete facts):
@@ -314,12 +354,17 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 - "User likes Chroma because it's intuitive"
 - "User finds Faiss too basic"
 - "User had issues with Milvus due to hosting overhead"
+- "User started using FastAPI on 2024-06-14" (absolute date, not "yesterday")
+- "User moved to Berlin in 2023" (resolved, not "last year")
 
 ### BAD Extractions:
+- "He started using it yesterday" (unresolved pronoun + relative time)
+- "They moved there last year" (unresolved pronoun + relative time)
 - "User is interested in X" (too vague)
 - "User asked about X" (conversation event, not fact)
 - "User asked for suggestions on X" (conversation event - extract the preference instead)
 - "User discussed X" (conversation event)
+- "User was advised to use X" (assistant-sourced, not user fact)
 - "User prefers X" when assistant suggested X but user didn't confirm (VIOLATES SOURCE RULE)
 - "User uses Python" when assistant provided Python code but user said they use JavaScript (WRONG)
 
diff --git a/src/memv/processing/temporal.py b/src/memv/processing/temporal.py
new file mode 100644
index 0000000..d2099bb
--- /dev/null
+++ b/src/memv/processing/temporal.py
@@ -0,0 +1,146 @@
+"""Temporal parsing utilities for knowledge atomization.
+
+Safety net for when LLMs ignore reference_timestamp instructions.
+Validates and backfills temporal fields on extracted knowledge.
+"""
+
+from __future__ import annotations
+
+import re
+from datetime import datetime, timezone
+
+from dateutil.parser import parse as dateutil_parse
+from dateutil.relativedelta import relativedelta
+
+# Relative time patterns that indicate unresolved temporal references
+_RELATIVE_PATTERNS = re.compile(
+    r"\b("
+    r"yesterday|today|tomorrow"
+    r"|(?:last|next|this)\s+(?:week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)"
+    r"|recently|soon|later|earlier"
+    r"|\d+\s+(?:days?|weeks?|months?|years?)\s+(?:ago|from\s+now)"
+    r")\b",
+    re.IGNORECASE,
+)
+
+# Named relative expressions → relativedelta offsets
+_RELATIVE_OFFSETS: dict[str, relativedelta] = {
+    "yesterday": relativedelta(days=-1),
+    "today": relativedelta(),
+    "tomorrow": relativedelta(days=1),
+    "last week": relativedelta(weeks=-1),
+    "next week": relativedelta(weeks=1),
+    "this week": relativedelta(),
+    "last month": relativedelta(months=-1),
+    "next month": relativedelta(months=1),
+    "this month": relativedelta(),
+    "last year": relativedelta(years=-1),
+    "next year": relativedelta(years=1),
+    "this year": relativedelta(),
+}
+
+# N units ago / from now
+_N_UNITS_PATTERN = re.compile(
+    r"(\d+)\s+(days?|weeks?|months?|years?)\s+(ago|from\s+now)",
+    re.IGNORECASE,
+)
+
+# Backfill patterns for temporal_info strings
+_SINCE_PATTERN = re.compile(r"(?:since|from|starting|began?)\s+(.+?)(?:\s*$|\s*(?:to|until|through)\s+)", re.IGNORECASE)
+_UNTIL_PATTERN = re.compile(r"(?:until|to|through|ending|ended?)\s+(.+?)$", re.IGNORECASE)
+_FROM_TO_PATTERN = re.compile(r"(?:from|since)\s+(.+?)\s+(?:to|until|through)\s+(.+?)$", re.IGNORECASE)
+
+
+def contains_relative_time(text: str) -> bool:
+    """Check if text contains unresolved relative time expressions."""
+    return bool(_RELATIVE_PATTERNS.search(text))
+
+
+def parse_temporal_expression(text: str, reference: datetime) -> datetime | None:
+    """Parse a temporal expression relative to a reference time.
+
+    Handles common relative patterns via relativedelta, falls through
+    to dateutil.parser.parse for natural language / ISO dates.
+    Returns None on failure.
+    """
+    normalized = text.strip().lower()
+
+    # Named relative offsets
+    if normalized in _RELATIVE_OFFSETS:
+        return reference + _RELATIVE_OFFSETS[normalized]
+
+    # "last/next <weekday>" patterns
+    for prefix, direction in [("last", -1), ("next", 1)]:
+        for day_name in ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]:
+            if normalized == f"{prefix} {day_name}":
+                target_weekday = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"].index(day_name)
+                days_diff = (reference.weekday() - target_weekday) % 7
+                if direction == -1:
+                    days_diff = days_diff or 7
+                    return reference - relativedelta(days=days_diff)
+                else:
+                    days_diff = (target_weekday - reference.weekday()) % 7
+                    days_diff = days_diff or 7
+                    return reference + relativedelta(days=days_diff)
+
+    # "N days/weeks/months/years ago/from now"
+    match = _N_UNITS_PATTERN.match(normalized)
+    if match:
+        n = int(match.group(1))
+        unit = match.group(2).rstrip("s")  # normalize "days" -> "day"
+        direction = -1 if match.group(3).lower() == "ago" else 1
+        unit_map = {"day": "days", "week": "weeks", "month": "months", "year": "years"}
+        delta = relativedelta(**{unit_map[unit]: n * direction})
+        return reference + delta
+
+    # Fall through to dateutil for ISO 8601 and natural dates
+    try:
+        return dateutil_parse(text, default=reference.replace(hour=0, minute=0, second=0, microsecond=0))
+    except (ValueError, OverflowError):
+        return None
+
+
+def backfill_temporal_fields(
+    temporal_info: str | None,
+    valid_at: datetime | None,
+    invalid_at: datetime | None,
+    reference: datetime,
+) -> tuple[datetime | None, datetime | None]:
+    """Attempt to populate valid_at/invalid_at from temporal_info string.
+
+    Only fills None fields — preserves existing values.
+    Returns (valid_at, invalid_at).
+    """
+    if not temporal_info:
+        return valid_at, invalid_at
+
+    # "from X to Y" — try to fill both
+    from_to = _FROM_TO_PATTERN.search(temporal_info)
+    if from_to:
+        if valid_at is None:
+            parsed = parse_temporal_expression(from_to.group(1), reference)
+            if parsed:
+                valid_at = parsed.replace(tzinfo=timezone.utc) if parsed.tzinfo is None else parsed
+        if invalid_at is None:
+            parsed = parse_temporal_expression(from_to.group(2), reference)
+            if parsed:
+                invalid_at = parsed.replace(tzinfo=timezone.utc) if parsed.tzinfo is None else parsed
+        return valid_at, invalid_at
+
+    # "since X" → valid_at
+    if valid_at is None:
+        since = _SINCE_PATTERN.search(temporal_info)
+        if since:
+            parsed = parse_temporal_expression(since.group(1), reference)
+            if parsed:
+                valid_at = parsed.replace(tzinfo=timezone.utc) if parsed.tzinfo is None else parsed
+
+    # "until X" → invalid_at
+    if invalid_at is None:
+        until = _UNTIL_PATTERN.search(temporal_info)
+        if until:
+            parsed = parse_temporal_expression(until.group(1), reference)
+            if parsed:
+                invalid_at = parsed.replace(tzinfo=timezone.utc) if parsed.tzinfo is None else parsed
+
+    return valid_at, invalid_at
diff --git a/tests/test_atomization.py b/tests/test_atomization.py
new file mode 100644
index 0000000..77d3164
--- /dev/null
+++ b/tests/test_atomization.py
@@ -0,0 +1,221 @@
+"""Tests for atomization: validation filter + prompt integration."""
+
+import json
+from datetime import datetime, timedelta, timezone
+
+from memv.memory.memory import Memory
+from memv.models import ExtractedKnowledge
+from memv.processing.extraction import ExtractionResponse
+from memv.processing.prompts import (
+    ATOMIZATION_RULES,
+    cold_start_extraction_prompt,
+    extraction_prompt_with_prediction,
+)
+
+from .conftest import MockEmbedder, MockLLM
+
+
+def _ts(minutes: int = 0) -> datetime:
+    return datetime(2024, 6, 15, 12, 0, 0, tzinfo=timezone.utc) + timedelta(minutes=minutes)
+
+
+def _episode_json(title="Test Episode", content="A test narrative."):
+    return json.dumps({"title": title, "content": content})
+
+
+def _extraction(items: list[ExtractedKnowledge]) -> ExtractionResponse:
+    return ExtractionResponse(extracted=items)
+
+
+def _make_memory(tmp_path, llm, embedder):
+    db_path = str(tmp_path / "atom.db")
+    return Memory(
+        db_path=db_path,
+        embedding_client=embedder,
+        llm_client=llm,
+        embedding_dimensions=1536,
+        enable_episode_merging=False,
+        enable_embedding_cache=False,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Validation filter: unit-level via e2e pipeline
+# ---------------------------------------------------------------------------
+
+
+async def test_rejects_first_person(tmp_path):
+    """Statement with 'I' / 'my' gets filtered out."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="I like Python", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I like Python", "Cool!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 0
+
+
+async def test_rejects_non_third_person(tmp_path):
+    """Statement not starting with 'User' gets filtered."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="He likes Python", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I like Python", "Cool!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 0
+
+
+async def test_rejects_relative_time(tmp_path):
+    """Statement with unresolved 'yesterday' gets filtered."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="User started using FastAPI yesterday", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I started using FastAPI yesterday", "Nice!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 0
+
+
+async def test_rejects_assistant_sourced(tmp_path):
+    """Statement with 'was advised/suggested/recommended' gets filtered."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="User was advised to use Docker", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "hi", "Use Docker!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 0
+
+
+async def test_accepts_clean_statement(tmp_path):
+    """Properly atomized statement passes all checks."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [
+            _extraction(
+                [ExtractedKnowledge(statement="User prefers Python for backend development", knowledge_type="new", confidence=0.9)]
+            )
+        ],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I prefer Python for backend", "Good choice!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 1
+
+
+async def test_confidence_filter_unchanged(tmp_path):
+    """Low-confidence items still rejected (regression check)."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="User might like Rust", knowledge_type="new", confidence=0.3)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "Maybe Rust?", "Interesting!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 0
+
+
+async def test_temporal_backfill_in_pipeline(tmp_path):
+    """temporal_info 'since January 2024' backfills valid_at when LLM leaves it None."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [
+            _extraction(
+                [
+                    ExtractedKnowledge(
+                        statement="User works at Vstorm",
+                        knowledge_type="new",
+                        confidence=0.9,
+                        temporal_info="since January 2024",
+                        valid_at=None,
+                        invalid_at=None,
+                    )
+                ]
+            )
+        ],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I work at Vstorm since Jan 2024", "Nice!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 1
+
+        result = await memory.retrieve("Vstorm", user_id="user1")
+        knowledge = result.retrieved_knowledge[0]
+        assert knowledge.valid_at is not None
+        assert knowledge.valid_at.year == 2024
+        assert knowledge.valid_at.month == 1
+
+
+# ---------------------------------------------------------------------------
+# Prompt content assertions
+# ---------------------------------------------------------------------------
+
+
+class TestPromptContent:
+    def test_cold_start_contains_atomization_rules(self):
+        prompt = cold_start_extraction_prompt("Test", [{"role": "user", "content": "hi"}], "2024-06-15T12:00:00Z")
+        assert "Self-Contained Statement Rules" in prompt
+        assert "PROHIBIT" in prompt
+
+    def test_warm_extraction_contains_atomization_rules(self):
+        prompt = extraction_prompt_with_prediction("prediction", ">>> USER: hi", "2024-06-15T12:00:00Z")
+        assert "Self-Contained Statement Rules" in prompt
+        assert "PROHIBIT" in prompt
+
+    def test_cold_start_contains_strong_timestamp_wording(self):
+        prompt = cold_start_extraction_prompt("Test", [{"role": "user", "content": "hi"}], "2024-06-15T12:00:00Z")
+        assert "will be REJECTED" in prompt
+
+    def test_warm_extraction_contains_strong_timestamp_wording(self):
+        prompt = extraction_prompt_with_prediction("prediction", ">>> USER: hi", "2024-06-15T12:00:00Z")
+        assert "will be REJECTED" in prompt
+
+    def test_atomization_rules_constant_exists(self):
+        assert "Self-Contained Statement Rules" in ATOMIZATION_RULES
diff --git a/tests/test_extractor.py b/tests/test_extractor.py
index 2b92ea9..b9ceb8c 100644
--- a/tests/test_extractor.py
+++ b/tests/test_extractor.py
@@ -155,3 +155,40 @@ async def test_temporal_fields_preserved():
     assert result[0].valid_at == valid
     assert result[0].invalid_at == invalid
     assert result[0].temporal_info == "from Jan 2024 to Jan 2025"
+
+
+async def test_cold_start_prompt_contains_atomization_rules():
+    """Cold start extraction prompt includes ATOMIZATION_RULES."""
+    llm = MockLLM()
+    llm.set_responses("generate_structured", [ExtractionResponse(extracted=[])])
+    extractor = PredictCalibrateExtractor(llm)
+
+    await extractor.extract(_episode(), existing_knowledge=[])
+
+    prompt = llm.calls["generate_structured"][0][0]
+    assert "Self-Contained Statement Rules" in prompt
+
+
+async def test_warm_prompt_contains_atomization_rules():
+    """Warm extraction prompt includes ATOMIZATION_RULES."""
+    llm = MockLLM()
+    llm.set_responses("generate", ["prediction"])
+    llm.set_responses("generate_structured", [ExtractionResponse(extracted=[])])
+    extractor = PredictCalibrateExtractor(llm)
+
+    await extractor.extract(_episode(), existing_knowledge=[_knowledge()])
+
+    prompt = llm.calls["generate_structured"][0][0]
+    assert "Self-Contained Statement Rules" in prompt
+
+
+async def test_prompt_contains_reference_timestamp():
+    """Both extraction paths include reference_timestamp in the prompt."""
+    llm = MockLLM()
+    llm.set_responses("generate_structured", [ExtractionResponse(extracted=[])])
+    extractor = PredictCalibrateExtractor(llm)
+
+    await extractor.extract(_episode(), existing_knowledge=[])
+
+    prompt = llm.calls["generate_structured"][0][0]
+    assert "reference_timestamp" in prompt
diff --git a/tests/test_memory_e2e.py b/tests/test_memory_e2e.py
index d12ecc8..3d0d9d4 100644
--- a/tests/test_memory_e2e.py
+++ b/tests/test_memory_e2e.py
@@ -63,7 +63,7 @@ async def test_process_returns_count(tmp_path):
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
-    llm.set_responses("generate_structured", [_extraction(["Fact A", "Fact B"])])
+    llm.set_responses("generate_structured", [_extraction(["User knows Fact A", "User knows Fact B"])])
 
     memory = _make_memory(tmp_path, llm, embedder, enable_knowledge_dedup=False)
     async with memory:
@@ -151,7 +151,7 @@ async def test_clear_user(tmp_path):
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
-    llm.set_responses("generate_structured", [_extraction(["Fact to delete"])])
+    llm.set_responses("generate_structured", [_extraction(["User has a fact to delete"])])
 
     memory = _make_memory(tmp_path, llm, embedder)
     async with memory:
@@ -162,7 +162,7 @@ async def test_clear_user(tmp_path):
         assert counts["messages"] >= 2
         assert counts["episodes"] >= 1
 
-        result = await memory.retrieve("Fact to delete", user_id="user1")
+        result = await memory.retrieve("User has a fact to delete", user_id="user1")
         assert len(result.retrieved_knowledge) == 0
 
 
@@ -197,7 +197,7 @@ async def test_auto_process_at_threshold(tmp_path):
             _episode_json("Auto", "Auto processed"),  # episode gen
         ],
     )
-    llm.set_responses("generate_structured", [_extraction(["Auto fact"])])
+    llm.set_responses("generate_structured", [_extraction(["User has auto fact"])])
 
     memory = _make_memory(tmp_path, llm, embedder, auto_process=True, batch_threshold=4)
     async with memory:
@@ -219,7 +219,7 @@ async def test_flush_forces_processing(tmp_path):
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json("Flushed", "Flushed episode")])
-    llm.set_responses("generate_structured", [_extraction(["Flushed fact"])])
+    llm.set_responses("generate_structured", [_extraction(["User has flushed fact"])])
 
     memory = _make_memory(tmp_path, llm, embedder, auto_process=True, batch_threshold=100)
     async with memory:
@@ -229,6 +229,38 @@ async def test_flush_forces_processing(tmp_path):
         assert count == 1
 
 
+async def test_atomization_filters_bad_statements(tmp_path):
+    """LLM returns mix of good and bad statements — only atomized ones survive."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json("Mixed", "Mixed quality")])
+    llm.set_responses(
+        "generate_structured",
+        [
+            ExtractionResponse(
+                extracted=[
+                    ExtractedKnowledge(statement="User prefers Vim", knowledge_type="new", confidence=0.9),
+                    ExtractedKnowledge(statement="I like Vim", knowledge_type="new", confidence=0.9),
+                    ExtractedKnowledge(statement="User started yesterday", knowledge_type="new", confidence=0.9),
+                    ExtractedKnowledge(statement="He uses Neovim", knowledge_type="new", confidence=0.9),
+                    ExtractedKnowledge(statement="User was advised to try Emacs", knowledge_type="new", confidence=0.9),
+                ]
+            )
+        ],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I prefer Vim", "Nice!", timestamp=_ts())
+        count = await memory.process("user1")
+        # Only "User prefers Vim" should survive all filters
+        assert count == 1
+
+        result = await memory.retrieve("Vim", user_id="user1")
+        assert result.retrieved_knowledge[0].statement == "User prefers Vim"
+
+
 async def test_dedup_skips_duplicate(tmp_path):
     """enable_knowledge_dedup=True → identical statement not stored twice."""
     llm = MockLLM()
diff --git a/tests/test_temporal.py b/tests/test_temporal.py
new file mode 100644
index 0000000..5783aaa
--- /dev/null
+++ b/tests/test_temporal.py
@@ -0,0 +1,205 @@
+"""Tests for temporal parsing utilities."""
+
+from datetime import datetime, timezone
+
+from memv.processing.temporal import (
+    backfill_temporal_fields,
+    contains_relative_time,
+    parse_temporal_expression,
+)
+
+
+def _ref(year=2024, month=6, day=15, hour=12):
+    return datetime(year, month, day, hour, 0, 0, tzinfo=timezone.utc)
+
+
+# ---------------------------------------------------------------------------
+# contains_relative_time
+# ---------------------------------------------------------------------------
+
+
+class TestContainsRelativeTime:
+    def test_yesterday(self):
+        assert contains_relative_time("User started yesterday")
+
+    def test_today(self):
+        assert contains_relative_time("User is working today")
+
+    def test_tomorrow(self):
+        assert contains_relative_time("User will finish tomorrow")
+
+    def test_last_week(self):
+        assert contains_relative_time("User mentioned last week")
+
+    def test_next_month(self):
+        assert contains_relative_time("User plans to move next month")
+
+    def test_n_days_ago(self):
+        assert contains_relative_time("User joined 3 days ago")
+
+    def test_n_years_from_now(self):
+        assert contains_relative_time("User plans to retire 5 years from now")
+
+    def test_recently(self):
+        assert contains_relative_time("User recently switched to Rust")
+
+    def test_soon(self):
+        assert contains_relative_time("User will deploy soon")
+
+    def test_absolute_date_not_flagged(self):
+        assert not contains_relative_time("User started on 2024-06-14")
+
+    def test_plain_text_not_flagged(self):
+        assert not contains_relative_time("User prefers Python over JavaScript")
+
+    def test_iso_date_not_flagged(self):
+        assert not contains_relative_time("User moved to Berlin in 2023")
+
+    def test_case_insensitive(self):
+        assert contains_relative_time("User started YESTERDAY")
+
+
+# ---------------------------------------------------------------------------
+# parse_temporal_expression
+# ---------------------------------------------------------------------------
+
+
+class TestParseTemporalExpression:
+    def test_yesterday(self):
+        result = parse_temporal_expression("yesterday", _ref())
+        assert result == datetime(2024, 6, 14, 12, 0, 0, tzinfo=timezone.utc)
+
+    def test_tomorrow(self):
+        result = parse_temporal_expression("tomorrow", _ref())
+        assert result == datetime(2024, 6, 16, 12, 0, 0, tzinfo=timezone.utc)
+
+    def test_today(self):
+        result = parse_temporal_expression("today", _ref())
+        assert result == _ref()
+
+    def test_last_week(self):
+        result = parse_temporal_expression("last week", _ref())
+        assert result == datetime(2024, 6, 8, 12, 0, 0, tzinfo=timezone.utc)
+
+    def test_3_days_ago(self):
+        result = parse_temporal_expression("3 days ago", _ref())
+        assert result == datetime(2024, 6, 12, 12, 0, 0, tzinfo=timezone.utc)
+
+    def test_2_months_from_now(self):
+        result = parse_temporal_expression("2 months from now", _ref())
+        assert result == datetime(2024, 8, 15, 12, 0, 0, tzinfo=timezone.utc)
+
+    def test_iso_8601(self):
+        result = parse_temporal_expression("2024-01-15", _ref())
+        assert result is not None
+        assert result.year == 2024
+        assert result.month == 1
+        assert result.day == 15
+
+    def test_natural_date(self):
+        result = parse_temporal_expression("January 2024", _ref())
+        assert result is not None
+        assert result.year == 2024
+        assert result.month == 1
+
+    def test_garbage_returns_none(self):
+        result = parse_temporal_expression("not a date at all xyz", _ref())
+        assert result is None
+
+    def test_last_monday(self):
+        # 2024-06-15 is a Saturday (weekday=5), last Monday = 2024-06-10
+        result = parse_temporal_expression("last monday", _ref())
+        assert result is not None
+        assert result.weekday() == 0  # Monday
+        assert result < _ref()
+
+    def test_next_friday(self):
+        # 2024-06-15 is a Saturday (weekday=5), next Friday = 2024-06-21
+        result = parse_temporal_expression("next friday", _ref())
+        assert result is not None
+        assert result.weekday() == 4  # Friday
+        assert result > _ref()
+
+
+# ---------------------------------------------------------------------------
+# backfill_temporal_fields
+# ---------------------------------------------------------------------------
+
+
+class TestBackfillTemporalFields:
+    def test_since_x(self):
+        valid_at, invalid_at = backfill_temporal_fields(
+            "since January 2024",
+            None,
+            None,
+            _ref(),
+        )
+        assert valid_at is not None
+        assert valid_at.year == 2024
+        assert valid_at.month == 1
+        assert invalid_at is None
+
+    def test_until_x(self):
+        valid_at, invalid_at = backfill_temporal_fields(
+            "until December 2024",
+            None,
+            None,
+            _ref(),
+        )
+        assert valid_at is None
+        assert invalid_at is not None
+        assert invalid_at.year == 2024
+        assert invalid_at.month == 12
+
+    def test_from_x_to_y(self):
+        valid_at, invalid_at = backfill_temporal_fields(
+            "from January 2024 to June 2024",
+            None,
+            None,
+            _ref(),
+        )
+        assert valid_at is not None
+        assert valid_at.month == 1
+        assert invalid_at is not None
+        assert invalid_at.month == 6
+
+    def test_preserves_existing_valid_at(self):
+        existing = datetime(2023, 1, 1, tzinfo=timezone.utc)
+        valid_at, invalid_at = backfill_temporal_fields(
+            "since March 2024",
+            existing,
+            None,
+            _ref(),
+        )
+        assert valid_at == existing  # preserved, not overwritten
+
+    def test_preserves_existing_invalid_at(self):
+        existing = datetime(2025, 12, 31, tzinfo=timezone.utc)
+        valid_at, invalid_at = backfill_temporal_fields(
+            "until March 2024",
+            None,
+            existing,
+            _ref(),
+        )
+        assert invalid_at == existing  # preserved
+
+    def test_none_temporal_info_noop(self):
+        valid_at, invalid_at = backfill_temporal_fields(None, None, None, _ref())
+        assert valid_at is None
+        assert invalid_at is None
+
+    def test_empty_string_noop(self):
+        valid_at, invalid_at = backfill_temporal_fields("", None, None, _ref())
+        assert valid_at is None
+        assert invalid_at is None
+
+    def test_unparseable_temporal_info(self):
+        valid_at, invalid_at = backfill_temporal_fields(
+            "some random text",
+            None,
+            None,
+            _ref(),
+        )
+        # Should not crash, fields stay None
+        assert valid_at is None
+        assert invalid_at is None
diff --git a/uv.lock b/uv.lock
index 18322ed..ecbc5ad 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2321,6 +2321,7 @@ dependencies = [
     { name = "openai" },
     { name = "pydantic" },
     { name = "pydantic-ai" },
+    { name = "python-dateutil" },
     { name = "sqlite-vec" },
     { name = "textual" },
 ]
@@ -2357,6 +2358,7 @@ requires-dist = [
     { name = "openai", specifier = ">=2.15.0" },
     { name = "pydantic", specifier = ">=2.12.5" },
     { name = "pydantic-ai", specifier = ">=1.43.0" },
+    { name = "python-dateutil", specifier = ">=2.9.0" },
     { name = "sqlite-vec", specifier = ">=0.1.6" },
     { name = "textual", specifier = ">=3.5.0" },
 ]

From 09de2b3ab746b05c8fc597613e19edd78989b149 Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 01:30:14 +0100
Subject: [PATCH 02/10] fix(extraction): address PR #13 review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Use word-boundary regex (^User\b) instead of startswith("User")
- Expand _ASSISTANT_SOURCE_RE to catch told/instructed/encouraged/shown/given
- Use removesuffix("s") instead of rstrip("s") for unit normalization
- Add comment on intentional .match() anchoring in temporal parser
- Make _FIRST_PERSON_RE case-insensitive for my/me/we/our (not I)
- Compute preview lazily (after confidence gate)
- Use placeholder syntax in ATOMIZATION_RULES date examples
- Add tests: same-weekday edge case, backfill with relative date,
  assistant-sourced "told" variant
- Fix test fixtures for word-boundary validation (User1 → User)

182 tests passing, lint and typecheck clean.
---
 src/memv/memory/_pipeline.py    | 25 ++++++++++++++-----------
 src/memv/processing/prompts.py  |  2 +-
 src/memv/processing/temporal.py |  3 ++-
 tests/test_atomization.py       | 20 +++++++++++++++++++-
 tests/test_memory_e2e.py        | 16 ++++++++--------
 tests/test_temporal.py          | 20 ++++++++++++++++++++
 6 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
index 8b3482a..e2bcbbf 100644
--- a/src/memv/memory/_pipeline.py
+++ b/src/memv/memory/_pipeline.py
@@ -20,8 +20,12 @@
 logger = logging.getLogger(__name__)
 
 # Compiled regexes for validation checks
-_FIRST_PERSON_RE = re.compile(r"\b(I|my|me|we|our)\b")
-_ASSISTANT_SOURCE_RE = re.compile(r"\b(?:was|were)\s+(?:advised|suggested|recommended)\b", re.IGNORECASE)
+_THIRD_PERSON_RE = re.compile(r"^User\b")
+_FIRST_PERSON_RE = re.compile(r"\b(I|[Mm][Yy]|[Mm][Ee]|[Ww][Ee]|[Oo][Uu][Rr])\b")
+_ASSISTANT_SOURCE_RE = re.compile(
+    r"\b(?:was|were)\s+(?:advised|suggested|recommended|told|instructed|encouraged|shown|given)\b",
+    re.IGNORECASE,
+)
 
 
 class Pipeline:
@@ -214,27 +218,26 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
 
     def _validate_extraction(self, item: ExtractedKnowledge) -> bool:
         """Filter extractions that are low-confidence or not self-contained."""
-        stmt = item.statement
-        preview = stmt[:60]
-
         if item.confidence < 0.7:
-            logger.debug("Rejected (low confidence %.2f): %s", item.confidence, preview)
+            logger.debug("Rejected (low confidence %.2f): %s", item.confidence, item.statement[:60])
             return False
 
-        if not stmt.startswith("User"):
-            logger.debug("Rejected (not third-person): %s", preview)
+        stmt = item.statement
+
+        if not _THIRD_PERSON_RE.match(stmt):
+            logger.debug("Rejected (not third-person): %s", stmt[:60])
             return False
 
         if _FIRST_PERSON_RE.search(stmt):
-            logger.debug("Rejected (first-person pronoun): %s", preview)
+            logger.debug("Rejected (first-person pronoun): %s", stmt[:60])
             return False
 
         if contains_relative_time(stmt):
-            logger.debug("Rejected (unresolved relative time): %s", preview)
+            logger.debug("Rejected (unresolved relative time): %s", stmt[:60])
             return False
 
         if _ASSISTANT_SOURCE_RE.search(stmt):
-            logger.debug("Rejected (assistant-sourced): %s", preview)
+            logger.debug("Rejected (assistant-sourced): %s", stmt[:60])
             return False
 
         return True
diff --git a/src/memv/processing/prompts.py b/src/memv/processing/prompts.py
index 7351315..a02ecc2 100644
--- a/src/memv/processing/prompts.py
+++ b/src/memv/processing/prompts.py
@@ -95,7 +95,7 @@
 - First person: "I", "my", "me", "we", "our"
 
 **REQUIRE in output statements:**
-- Absolute dates when temporal info exists: "on 2024-06-15", not "yesterday"
+- Absolute dates when temporal info exists: "on [resolved date]", not "yesterday"
 - Specific names: "User's React project at Vstorm", not "the project"
 - Third person with "User" as subject: "User prefers Python", not "I prefer Python"
 
diff --git a/src/memv/processing/temporal.py b/src/memv/processing/temporal.py
index d2099bb..cab036b 100644
--- a/src/memv/processing/temporal.py
+++ b/src/memv/processing/temporal.py
@@ -84,10 +84,11 @@ def parse_temporal_expression(text: str, reference: datetime) -> datetime | None
                     return reference + relativedelta(days=days_diff)
 
     # "N days/weeks/months/years ago/from now"
+    # .match() is intentionally start-anchored — input is already stripped/normalized
     match = _N_UNITS_PATTERN.match(normalized)
     if match:
         n = int(match.group(1))
-        unit = match.group(2).rstrip("s")  # normalize "days" -> "day"
+        unit = match.group(2).removesuffix("s")  # normalize "days" -> "day"
         direction = -1 if match.group(3).lower() == "ago" else 1
         unit_map = {"day": "days", "week": "weeks", "month": "months", "year": "years"}
         delta = relativedelta(**{unit_map[unit]: n * direction})
diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index 77d3164..b7e4547 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -99,7 +99,7 @@ async def test_rejects_relative_time(tmp_path):
 
 
 async def test_rejects_assistant_sourced(tmp_path):
-    """Statement with 'was advised/suggested/recommended' gets filtered."""
+    """Statement with 'was advised/suggested/recommended/told/instructed' gets filtered."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
@@ -116,6 +116,24 @@ async def test_rejects_assistant_sourced(tmp_path):
         assert count == 0
 
 
+async def test_rejects_assistant_sourced_told(tmp_path):
+    """'was told to' variant also gets filtered."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="User was told to migrate to Postgres", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "hi", "Migrate to Postgres!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 0
+
+
 async def test_accepts_clean_statement(tmp_path):
     """Properly atomized statement passes all checks."""
     llm = MockLLM()
diff --git a/tests/test_memory_e2e.py b/tests/test_memory_e2e.py
index 3d0d9d4..a8ad073 100644
--- a/tests/test_memory_e2e.py
+++ b/tests/test_memory_e2e.py
@@ -123,8 +123,8 @@ async def test_user_isolation_e2e(tmp_path):
     llm.set_responses(
         "generate_structured",
         [
-            _extraction(["User1 likes cats"]),
-            _extraction(["User2 likes dogs"]),
+            _extraction(["User likes cats"]),
+            _extraction(["User likes dogs"]),
         ],
     )
 
@@ -135,15 +135,15 @@ async def test_user_isolation_e2e(tmp_path):
         await memory.add_exchange("user2", "I like dogs", "Cool!", timestamp=_ts())
         await memory.process("user2")
 
-        r1 = await memory.retrieve("likes", user_id="user1")
-        r2 = await memory.retrieve("likes", user_id="user2")
+        r1 = await memory.retrieve("cats", user_id="user1")
+        r2 = await memory.retrieve("dogs", user_id="user2")
 
         s1 = [k.statement for k in r1.retrieved_knowledge]
         s2 = [k.statement for k in r2.retrieved_knowledge]
-        assert "User1 likes cats" in s1
-        assert "User2 likes dogs" not in s1
-        assert "User2 likes dogs" in s2
-        assert "User1 likes cats" not in s2
+        assert "User likes cats" in s1
+        assert "User likes dogs" not in s1
+        assert "User likes dogs" in s2
+        assert "User likes cats" not in s2
 
 
 async def test_clear_user(tmp_path):
diff --git a/tests/test_temporal.py b/tests/test_temporal.py
index 5783aaa..29115a1 100644
--- a/tests/test_temporal.py
+++ b/tests/test_temporal.py
@@ -120,6 +120,14 @@ def test_next_friday(self):
         assert result.weekday() == 4  # Friday
         assert result > _ref()
 
+    def test_next_monday_when_ref_is_monday(self):
+        # 2024-06-10 is a Monday — "next monday" should return 2024-06-17, not same day
+        ref_monday = _ref(year=2024, month=6, day=10)
+        result = parse_temporal_expression("next monday", ref_monday)
+        assert result is not None
+        assert result.weekday() == 0  # Monday
+        assert result > ref_monday  # must be future, not same day
+
 
 # ---------------------------------------------------------------------------
 # backfill_temporal_fields
@@ -203,3 +211,15 @@ def test_unparseable_temporal_info(self):
         # Should not crash, fields stay None
         assert valid_at is None
         assert invalid_at is None
+
+    def test_since_relative_date(self):
+        # "since yesterday" should resolve relative to reference
+        valid_at, invalid_at = backfill_temporal_fields(
+            "since yesterday",
+            None,
+            None,
+            _ref(),
+        )
+        assert valid_at is not None
+        assert valid_at == datetime(2024, 6, 14, 12, 0, 0, tzinfo=timezone.utc)
+        assert invalid_at is None

From ee0408eac20c165fa63125ad69159549e5c3bb20 Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 01:43:02 +0100
Subject: [PATCH 03/10] fix(extraction): handle I/O false positive and document
 scope

- Add negative lookahead/lookbehind for "/" in _FIRST_PERSON_RE to
  avoid matching "I" in "I/O"
- Document ^User\b scope constraint in _validate_extraction docstring
- Add control flow comment in backfill_temporal_fields
---
 src/memv/memory/_pipeline.py    | 9 +++++++--
 src/memv/processing/temporal.py | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
index e2bcbbf..d15a0b1 100644
--- a/src/memv/memory/_pipeline.py
+++ b/src/memv/memory/_pipeline.py
@@ -21,7 +21,7 @@
 
 # Compiled regexes for validation checks
 _THIRD_PERSON_RE = re.compile(r"^User\b")
-_FIRST_PERSON_RE = re.compile(r"\b(I|[Mm][Yy]|[Mm][Ee]|[Ww][Ee]|[Oo][Uu][Rr])\b")
+_FIRST_PERSON_RE = re.compile(r"(?<!/)\b(I(?!/)|[Mm][Yy]|[Mm][Ee]|[Ww][Ee]|[Oo][Uu][Rr])\b")
 _ASSISTANT_SOURCE_RE = re.compile(
     r"\b(?:was|were)\s+(?:advised|suggested|recommended|told|instructed|encouraged|shown|given)\b",
     re.IGNORECASE,
@@ -217,7 +217,12 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
         return stored_count
 
     def _validate_extraction(self, item: ExtractedKnowledge) -> bool:
-        """Filter extractions that are low-confidence or not self-contained."""
+        """Filter extractions that are low-confidence or not self-contained.
+
+        Scope: only User-subject statements pass. Third-party facts ("Bob prefers
+        Postgres") are intentionally dropped — the knowledge base stores facts
+        about the user, not about people mentioned in conversation.
+        """
         if item.confidence < 0.7:
             logger.debug("Rejected (low confidence %.2f): %s", item.confidence, item.statement[:60])
             return False
diff --git a/src/memv/processing/temporal.py b/src/memv/processing/temporal.py
index cab036b..806e27b 100644
--- a/src/memv/processing/temporal.py
+++ b/src/memv/processing/temporal.py
@@ -128,6 +128,8 @@ def backfill_temporal_fields(
                 invalid_at = parsed.replace(tzinfo=timezone.utc) if parsed.tzinfo is None else parsed
         return valid_at, invalid_at
 
+    # "from X to Y" handled above — remaining checks are single-endpoint patterns
+
     # "since X" → valid_at
     if valid_at is None:
         since = _SINCE_PATTERN.search(temporal_info)

From ea20e2c53140ab8a005aa00a43e1a671d3f24604 Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 18:00:27 +0100
Subject: [PATCH 04/10] refactor(extraction): drop assistant-source regex
 filter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Regex can't distinguish semantic intent — "User was given a promotion"
(valid) vs "User was told to use Docker" (assistant-sourced). Prompt-
level EXCLUSIONS already handle this correctly at the right layer.
---
 src/memv/memory/_pipeline.py |  8 --------
 tests/test_atomization.py    | 36 ------------------------------------
 2 files changed, 44 deletions(-)

diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
index d15a0b1..c631cf9 100644
--- a/src/memv/memory/_pipeline.py
+++ b/src/memv/memory/_pipeline.py
@@ -22,10 +22,6 @@
 # Compiled regexes for validation checks
 _THIRD_PERSON_RE = re.compile(r"^User\b")
 _FIRST_PERSON_RE = re.compile(r"(?<!/)\b(I(?!/)|[Mm][Yy]|[Mm][Ee]|[Ww][Ee]|[Oo][Uu][Rr])\b")
-_ASSISTANT_SOURCE_RE = re.compile(
-    r"\b(?:was|were)\s+(?:advised|suggested|recommended|told|instructed|encouraged|shown|given)\b",
-    re.IGNORECASE,
-)
 
 
 class Pipeline:
@@ -241,10 +237,6 @@ def _validate_extraction(self, item: ExtractedKnowledge) -> bool:
             logger.debug("Rejected (unresolved relative time): %s", stmt[:60])
             return False
 
-        if _ASSISTANT_SOURCE_RE.search(stmt):
-            logger.debug("Rejected (assistant-sourced): %s", stmt[:60])
-            return False
-
         return True
 
     async def _is_duplicate_knowledge(self, embedding: list[float], user_id: str) -> tuple[bool, float]:
diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index b7e4547..8805875 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -98,42 +98,6 @@ async def test_rejects_relative_time(tmp_path):
         assert count == 0
 
 
-async def test_rejects_assistant_sourced(tmp_path):
-    """Statement with 'was advised/suggested/recommended/told/instructed' gets filtered."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User was advised to use Docker", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "hi", "Use Docker!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
-async def test_rejects_assistant_sourced_told(tmp_path):
-    """'was told to' variant also gets filtered."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User was told to migrate to Postgres", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "hi", "Migrate to Postgres!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
 async def test_accepts_clean_statement(tmp_path):
     """Properly atomized statement passes all checks."""
     llm = MockLLM()

From 5f46be14b74d89615021bbce9238adea148b7810 Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 18:34:30 +0100
Subject: [PATCH 05/10] fix(extraction): address PR #13 review round 5

Add missing _ASSISTANT_SOURCE_RE code-level check with infinitive
requirement ("was advised to") to avoid false positives on legitimate
passive constructions. Fix _THIRD_PERSON_RE to accept lowercase "user".
Fix began? regex typo, remove later/earlier false positives, remove
bare "to" from _UNTIL_PATTERN, extract weekday constant.
---
 src/memv/memory/_pipeline.py    | 11 ++++++-
 src/memv/processing/temporal.py | 11 ++++---
 tests/test_atomization.py       | 54 +++++++++++++++++++++++++++++++++
 tests/test_temporal.py          | 28 +++++++++++++++++
 4 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
index c631cf9..dfc189b 100644
--- a/src/memv/memory/_pipeline.py
+++ b/src/memv/memory/_pipeline.py
@@ -20,8 +20,13 @@
 logger = logging.getLogger(__name__)
 
 # Compiled regexes for validation checks
-_THIRD_PERSON_RE = re.compile(r"^User\b")
+_THIRD_PERSON_RE = re.compile(r"^[Uu]ser\b")
 _FIRST_PERSON_RE = re.compile(r"(?<!/)\b(I(?!/)|[Mm][Yy]|[Mm][Ee]|[Ww][Ee]|[Oo][Uu][Rr])\b")
+# Requires infinitive ("was advised to") to avoid rejecting legitimate facts like "User was given a promotion"
+_ASSISTANT_SOURCE_RE = re.compile(
+    r"\b(?:was|were)\s+(?:advised|suggested|recommended|told|instructed|encouraged)\s+to\b",
+    re.IGNORECASE,
+)
 
 
 class Pipeline:
@@ -237,6 +242,10 @@ def _validate_extraction(self, item: ExtractedKnowledge) -> bool:
             logger.debug("Rejected (unresolved relative time): %s", stmt[:60])
             return False
 
+        if _ASSISTANT_SOURCE_RE.search(stmt):
+            logger.debug("Rejected (assistant-sourced): %s", stmt[:60])
+            return False
+
         return True
 
     async def _is_duplicate_knowledge(self, embedding: list[float], user_id: str) -> tuple[bool, float]:
diff --git a/src/memv/processing/temporal.py b/src/memv/processing/temporal.py
index 806e27b..9e3f93b 100644
--- a/src/memv/processing/temporal.py
+++ b/src/memv/processing/temporal.py
@@ -17,7 +17,7 @@
     r"\b("
     r"yesterday|today|tomorrow"
     r"|(?:last|next|this)\s+(?:week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)"
-    r"|recently|soon|later|earlier"
+    r"|recently|soon"
     r"|\d+\s+(?:days?|weeks?|months?|years?)\s+(?:ago|from\s+now)"
     r")\b",
     re.IGNORECASE,
@@ -40,14 +40,16 @@
 }
 
 # N units ago / from now
+_WEEKDAY_NAMES = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
+
 _N_UNITS_PATTERN = re.compile(
     r"(\d+)\s+(days?|weeks?|months?|years?)\s+(ago|from\s+now)",
     re.IGNORECASE,
 )
 
 # Backfill patterns for temporal_info strings
-_SINCE_PATTERN = re.compile(r"(?:since|from|starting|began?)\s+(.+?)(?:\s*$|\s*(?:to|until|through)\s+)", re.IGNORECASE)
-_UNTIL_PATTERN = re.compile(r"(?:until|to|through|ending|ended?)\s+(.+?)$", re.IGNORECASE)
+_SINCE_PATTERN = re.compile(r"(?:since|from|starting|began)\s+(.+?)(?:\s*$|\s*(?:to|until|through)\s+)", re.IGNORECASE)
+_UNTIL_PATTERN = re.compile(r"(?:until|through|ending|ended?)\s+(.+?)$", re.IGNORECASE)
 _FROM_TO_PATTERN = re.compile(r"(?:from|since)\s+(.+?)\s+(?:to|until|through)\s+(.+?)$", re.IGNORECASE)
 
 
@@ -71,9 +73,8 @@ def parse_temporal_expression(text: str, reference: datetime) -> datetime | None
 
     # "last/next <weekday>" patterns
     for prefix, direction in [("last", -1), ("next", 1)]:
-        for day_name in ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]:
+        for target_weekday, day_name in enumerate(_WEEKDAY_NAMES):
             if normalized == f"{prefix} {day_name}":
-                target_weekday = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"].index(day_name)
                 days_diff = (reference.weekday() - target_weekday) % 7
                 if direction == -1:
                     days_diff = days_diff or 7
diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index 8805875..d462db9 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -120,6 +120,60 @@ async def test_accepts_clean_statement(tmp_path):
         assert count == 1
 
 
+async def test_rejects_assistant_sourced(tmp_path):
+    """Statement with 'was advised to' pattern gets filtered."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="User was advised to try Emacs", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "The assistant told me to try Emacs", "Sure!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 0
+
+
+async def test_accepts_passive_without_infinitive(tmp_path):
+    """'was given a promotion' is NOT assistant-sourced — should pass."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="User was given a promotion", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I was given a promotion", "Congrats!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 1
+
+
+async def test_accepts_lowercase_user(tmp_path):
+    """LLM may output lowercase 'user' — should still pass third-person check."""
+    llm = MockLLM()
+    embedder = MockEmbedder()
+
+    llm.set_responses("generate", [_episode_json()])
+    llm.set_responses(
+        "generate_structured",
+        [_extraction([ExtractedKnowledge(statement="user prefers Python", knowledge_type="new", confidence=0.9)])],
+    )
+
+    memory = _make_memory(tmp_path, llm, embedder)
+    async with memory:
+        await memory.add_exchange("user1", "I prefer Python", "Nice!", timestamp=_ts())
+        count = await memory.process("user1")
+        assert count == 1
+
+
 async def test_confidence_filter_unchanged(tmp_path):
     """Low-confidence items still rejected (regression check)."""
     llm = MockLLM()
diff --git a/tests/test_temporal.py b/tests/test_temporal.py
index 29115a1..7380f92 100644
--- a/tests/test_temporal.py
+++ b/tests/test_temporal.py
@@ -58,6 +58,12 @@ def test_iso_date_not_flagged(self):
     def test_case_insensitive(self):
         assert contains_relative_time("User started YESTERDAY")
 
+    def test_earlier_as_adjective_not_flagged(self):
+        assert not contains_relative_time("User prefers the earlier version of Python")
+
+    def test_later_as_adjective_not_flagged(self):
+        assert not contains_relative_time("User upgraded to a later release of Node")
+
 
 # ---------------------------------------------------------------------------
 # parse_temporal_expression
@@ -223,3 +229,25 @@ def test_since_relative_date(self):
         assert valid_at is not None
         assert valid_at == datetime(2024, 6, 14, 12, 0, 0, tzinfo=timezone.utc)
         assert invalid_at is None
+
+    def test_bare_to_not_matched_as_until(self):
+        # "related to data pipelines" should not trigger _UNTIL_PATTERN
+        valid_at, invalid_at = backfill_temporal_fields(
+            "related to data pipelines",
+            None,
+            None,
+            _ref(),
+        )
+        assert valid_at is None
+        assert invalid_at is None
+
+    def test_began_prefix(self):
+        valid_at, invalid_at = backfill_temporal_fields(
+            "began January 2024",
+            None,
+            None,
+            _ref(),
+        )
+        assert valid_at is not None
+        assert valid_at.year == 2024
+        assert valid_at.month == 1

From a572e2b5949439d642df6c2cd312b3bb7cb40c2d Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 19:18:06 +0100
Subject: [PATCH 06/10] fix(test): test _FIRST_PERSON_RE instead of
 _THIRD_PERSON_RE

Change test_rejects_first_person to use a statement that passes
the third-person check so _FIRST_PERSON_RE is actually exercised.
---
 tests/test_atomization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index d462db9..4627a64 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -52,7 +52,7 @@ async def test_rejects_first_person(tmp_path):
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="I like Python", knowledge_type="new", confidence=0.9)])],
+        [_extraction([ExtractedKnowledge(statement="User mentioned that my team uses React", knowledge_type="new", confidence=0.9)])],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)

From 37547d5aa322e06773448ec37dc5ad72bb4ced65 Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 21:31:50 +0100
Subject: [PATCH 07/10] refactor(extraction): remove regex filters, use
 prompt-level enforcement

Regex filters (_THIRD_PERSON_RE, _FIRST_PERSON_RE, _ASSISTANT_SOURCE_RE,
contains_relative_time) only worked for English. Extraction quality is now
enforced entirely at the prompt level via ATOMIZATION_RULES and EXCLUSIONS.

_validate_extraction retains confidence >= 0.7 as the sole code-level gate.

Prompts updated with nuanced source rules: extract assistant-communicated
facts (dates, appointments) while rejecting suggestions/hypotheticals.
---
 CLAUDE.md                      |   6 ++
 src/memv/memory/_pipeline.py   |  37 +----------
 src/memv/processing/prompts.py |  27 +++++---
 tests/test_atomization.py      | 110 ++++++++-------------------------
 tests/test_memory_e2e.py       |  11 ++--
 5 files changed, 55 insertions(+), 136 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 8b8a0c5..4298d19 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -104,6 +104,12 @@ async with memory:
 
 **User isolation is mandatory**: All retrieval and storage operations require and filter by `user_id`. No cross-user queries possible by design.
 
+**Extraction validation (`_pipeline.py`)**: `_validate_extraction` enforces a single check: confidence >= 0.7. All regex filters (third-person, first-person pronouns, relative time, assistant-sourced patterns) were removed because they only work for English. Quality enforcement is handled entirely at the prompt level via ATOMIZATION_RULES and EXCLUSIONS in `prompts.py`.
+
+**`temporal.py` regex gotchas**: Avoid broad word matches in `_RELATIVE_PATTERNS` (e.g. "later"/"earlier" match adjective usage). `_UNTIL_PATTERN` must not include bare "to" (matches non-temporal uses). `_SINCE_PATTERN` uses `began` not `began?` (the `?` makes "a" optional).
+
+**PR reviews**: The `claude` bot posts automated code reviews on PRs. Check with `gh pr view <N> --comments` and address feedback before merging.
+
 ## Code Style
 
 - Line length: 135 characters
diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
index dfc189b..210af1f 100644
--- a/src/memv/memory/_pipeline.py
+++ b/src/memv/memory/_pipeline.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import logging
-import re
 from typing import TYPE_CHECKING
 
 from memv.models import (
@@ -12,22 +11,13 @@
     Message,
     SemanticKnowledge,
 )
-from memv.processing.temporal import backfill_temporal_fields, contains_relative_time
+from memv.processing.temporal import backfill_temporal_fields
 
 if TYPE_CHECKING:
     from memv.memory._lifecycle import LifecycleManager
 
 logger = logging.getLogger(__name__)
 
-# Compiled regexes for validation checks
-_THIRD_PERSON_RE = re.compile(r"^[Uu]ser\b")
-_FIRST_PERSON_RE = re.compile(r"(?<!/)\b(I(?!/)|[Mm][Yy]|[Mm][Ee]|[Ww][Ee]|[Oo][Uu][Rr])\b")
-# Requires infinitive ("was advised to") to avoid rejecting legitimate facts like "User was given a promotion"
-_ASSISTANT_SOURCE_RE = re.compile(
-    r"\b(?:was|were)\s+(?:advised|suggested|recommended|told|instructed|encouraged)\s+to\b",
-    re.IGNORECASE,
-)
-
 
 class Pipeline:
     """Handles message processing, episode generation, and knowledge extraction."""
@@ -218,34 +208,11 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
         return stored_count
 
     def _validate_extraction(self, item: ExtractedKnowledge) -> bool:
-        """Filter extractions that are low-confidence or not self-contained.
-
-        Scope: only User-subject statements pass. Third-party facts ("Bob prefers
-        Postgres") are intentionally dropped — the knowledge base stores facts
-        about the user, not about people mentioned in conversation.
-        """
+        """Filter low-confidence extractions."""
         if item.confidence < 0.7:
             logger.debug("Rejected (low confidence %.2f): %s", item.confidence, item.statement[:60])
             return False
 
-        stmt = item.statement
-
-        if not _THIRD_PERSON_RE.match(stmt):
-            logger.debug("Rejected (not third-person): %s", stmt[:60])
-            return False
-
-        if _FIRST_PERSON_RE.search(stmt):
-            logger.debug("Rejected (first-person pronoun): %s", stmt[:60])
-            return False
-
-        if contains_relative_time(stmt):
-            logger.debug("Rejected (unresolved relative time): %s", stmt[:60])
-            return False
-
-        if _ASSISTANT_SOURCE_RE.search(stmt):
-            logger.debug("Rejected (assistant-sourced): %s", stmt[:60])
-            return False
-
         return True
 
     async def _is_duplicate_knowledge(self, embedding: list[float], user_id: str) -> tuple[bool, float]:
diff --git a/src/memv/processing/prompts.py b/src/memv/processing/prompts.py
index a02ecc2..3110e31 100644
--- a/src/memv/processing/prompts.py
+++ b/src/memv/processing/prompts.py
@@ -68,9 +68,14 @@
 - Conversation events ("User asked about X", "User requested Y") - extract the FACT, not the action
 - Assistant-sourced knowledge ("was advised to", "was suggested to", "was recommended to")
 
-**CRITICAL SOURCE RULE - READ CAREFULLY:**
-- ONLY extract facts the USER explicitly stated in their own messages
+**CRITICAL SOURCE RULES - READ CAREFULLY:**
+- Extract facts the user explicitly stated in their own messages
+- DO extract factual information communicated by the assistant (dates, appointments, confirmations, scheduled events)
 - NEVER extract assistant suggestions, recommendations, or code examples as user facts
+- Treat assistant suggestions about user intent as speculative — never encode them as facts
+- NEVER attribute intent/action/statement to user if it originated in a hypothetical/suggestion/conditional from the assistant
+- Ignore assistant-led topics unless user acts on them
+- Attribute preferences only to explicit user claims — never to questions or reactions
 - If assistant says "use Python" and user doesn't respond with "yes" or confirm - DO NOT extract "User uses Python"
 - If assistant provides code in language X but user says "I use Y" - extract Y, not X
 - The user ASKING about something is NOT the same as the user USING it
@@ -97,7 +102,7 @@
 **REQUIRE in output statements:**
 - Absolute dates when temporal info exists: "on [resolved date]", not "yesterday"
 - Specific names: "User's React project at Vstorm", not "the project"
-- Third person with "User" as subject: "User prefers Python", not "I prefer Python"
+- Third person: "User prefers Python", not "I prefer Python"
 
 **Coreference resolution — resolve BEFORE writing the statement:**
 - "my kids" → "User's children"
@@ -237,10 +242,12 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
     return f"""Extract HIGH-VALUE, PERSISTENT knowledge from this conversation.
 {timestamp_section}
 
-**CRITICAL RULE: Extract ONLY from lines starting with ">>> USER:"**
-- These are the user's actual words - the ONLY source of truth
-- IGNORE all ASSISTANT lines completely
-- Do NOT infer, expand, or modify what the user said
+**SOURCE RULES:**
+- Lines starting with ">>> USER:" are the user's actual words — the primary source of truth
+- DO extract factual information communicated by the assistant (dates, appointments, confirmations)
+- Do NOT extract assistant suggestions, recommendations, or hypotheticals as user facts
+- Treat assistant suggestions about user intent as speculative — never encode them as facts
+- Attribute preferences only to explicit user claims — never to assistant questions or reactions
 - Preserve the user's exact phrasing and technical terms
 
 <episode_context>
@@ -318,8 +325,10 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 
     return f"""Extract valuable knowledge by comparing actual conversation with predicted content.
 {timestamp_section}
-**CRITICAL: Extract ONLY from lines starting with ">>> USER:" - these are the user's actual words.**
-**IGNORE all ASSISTANT lines - those are suggestions, not user facts.**
+**SOURCE RULES:**
+- Lines starting with ">>> USER:" are the user's actual words — the primary source of truth.
+- DO extract factual info communicated by the assistant (dates, appointments, confirmations).
+- Do NOT extract assistant suggestions, recommendations, or hypotheticals as user facts.
 
 <prediction>
 {prediction}
diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index 4627a64..dc15a90 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -40,66 +40,30 @@ def _make_memory(tmp_path, llm, embedder):
 
 
 # ---------------------------------------------------------------------------
-# Validation filter: unit-level via e2e pipeline
+# Confidence threshold
 # ---------------------------------------------------------------------------
 
 
-async def test_rejects_first_person(tmp_path):
-    """Statement with 'I' / 'my' gets filtered out."""
+async def test_rejects_low_confidence(tmp_path):
+    """Low-confidence items rejected."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User mentioned that my team uses React", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "I like Python", "Cool!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
-async def test_rejects_non_third_person(tmp_path):
-    """Statement not starting with 'User' gets filtered."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="He likes Python", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "I like Python", "Cool!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
-async def test_rejects_relative_time(tmp_path):
-    """Statement with unresolved 'yesterday' gets filtered."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User started using FastAPI yesterday", knowledge_type="new", confidence=0.9)])],
+        [_extraction([ExtractedKnowledge(statement="User might like Rust", knowledge_type="new", confidence=0.3)])],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)
     async with memory:
-        await memory.add_exchange("user1", "I started using FastAPI yesterday", "Nice!", timestamp=_ts())
+        await memory.add_exchange("user1", "Maybe Rust?", "Interesting!", timestamp=_ts())
         count = await memory.process("user1")
         assert count == 0
 
 
-async def test_accepts_clean_statement(tmp_path):
-    """Properly atomized statement passes all checks."""
+async def test_accepts_high_confidence(tmp_path):
+    """Statement with confidence >= 0.7 passes."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
@@ -120,76 +84,52 @@ async def test_accepts_clean_statement(tmp_path):
         assert count == 1
 
 
-async def test_rejects_assistant_sourced(tmp_path):
-    """Statement with 'was advised to' pattern gets filtered."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User was advised to try Emacs", knowledge_type="new", confidence=0.9)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "The assistant told me to try Emacs", "Sure!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
-
-
-async def test_accepts_passive_without_infinitive(tmp_path):
-    """'was given a promotion' is NOT assistant-sourced — should pass."""
+async def test_accepts_non_user_subject(tmp_path):
+    """Statements not starting with 'User' are accepted."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User was given a promotion", knowledge_type="new", confidence=0.9)])],
+        [_extraction([ExtractedKnowledge(statement="The appointment was moved to March 15", knowledge_type="new", confidence=0.9)])],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)
     async with memory:
-        await memory.add_exchange("user1", "I was given a promotion", "Congrats!", timestamp=_ts())
+        await memory.add_exchange("user1", "When is my appointment?", "It was moved to March 15", timestamp=_ts())
         count = await memory.process("user1")
         assert count == 1
 
 
-async def test_accepts_lowercase_user(tmp_path):
-    """LLM may output lowercase 'user' — should still pass third-person check."""
+async def test_confidence_boundary(tmp_path):
+    """Confidence exactly at 0.7 passes, below does not."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="user prefers Python", knowledge_type="new", confidence=0.9)])],
+        [
+            _extraction(
+                [
+                    ExtractedKnowledge(statement="User works at Vstorm", knowledge_type="new", confidence=0.7),
+                    ExtractedKnowledge(statement="User likes tea", knowledge_type="new", confidence=0.69),
+                ]
+            )
+        ],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)
     async with memory:
-        await memory.add_exchange("user1", "I prefer Python", "Nice!", timestamp=_ts())
+        await memory.add_exchange("user1", "I work at Vstorm and like tea", "Nice!", timestamp=_ts())
         count = await memory.process("user1")
         assert count == 1
 
 
-async def test_confidence_filter_unchanged(tmp_path):
-    """Low-confidence items still rejected (regression check)."""
-    llm = MockLLM()
-    embedder = MockEmbedder()
-
-    llm.set_responses("generate", [_episode_json()])
-    llm.set_responses(
-        "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="User might like Rust", knowledge_type="new", confidence=0.3)])],
-    )
-
-    memory = _make_memory(tmp_path, llm, embedder)
-    async with memory:
-        await memory.add_exchange("user1", "Maybe Rust?", "Interesting!", timestamp=_ts())
-        count = await memory.process("user1")
-        assert count == 0
+# ---------------------------------------------------------------------------
+# Temporal backfill
+# ---------------------------------------------------------------------------
 
 
 async def test_temporal_backfill_in_pipeline(tmp_path):
diff --git a/tests/test_memory_e2e.py b/tests/test_memory_e2e.py
index a8ad073..ed699cb 100644
--- a/tests/test_memory_e2e.py
+++ b/tests/test_memory_e2e.py
@@ -229,8 +229,8 @@ async def test_flush_forces_processing(tmp_path):
         assert count == 1
 
 
-async def test_atomization_filters_bad_statements(tmp_path):
-    """LLM returns mix of good and bad statements — only atomized ones survive."""
+async def test_confidence_filters_low_quality_statements(tmp_path):
+    """Only statements with confidence >= 0.7 survive the pipeline filter."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
@@ -241,10 +241,8 @@ async def test_atomization_filters_bad_statements(tmp_path):
             ExtractionResponse(
                 extracted=[
                     ExtractedKnowledge(statement="User prefers Vim", knowledge_type="new", confidence=0.9),
-                    ExtractedKnowledge(statement="I like Vim", knowledge_type="new", confidence=0.9),
-                    ExtractedKnowledge(statement="User started yesterday", knowledge_type="new", confidence=0.9),
-                    ExtractedKnowledge(statement="He uses Neovim", knowledge_type="new", confidence=0.9),
-                    ExtractedKnowledge(statement="User was advised to try Emacs", knowledge_type="new", confidence=0.9),
+                    ExtractedKnowledge(statement="User likes Emacs", knowledge_type="new", confidence=0.5),
+                    ExtractedKnowledge(statement="User uses Neovim", knowledge_type="new", confidence=0.3),
                 ]
             )
         ],
@@ -254,7 +252,6 @@ async def test_atomization_filters_bad_statements(tmp_path):
     async with memory:
         await memory.add_exchange("user1", "I prefer Vim", "Nice!", timestamp=_ts())
         count = await memory.process("user1")
-        # Only "User prefers Vim" should survive all filters
         assert count == 1
 
         result = await memory.retrieve("Vim", user_id="user1")

From afc38b9a96571f1513096f906ea1445364ba6929 Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 21:47:30 +0100
Subject: [PATCH 08/10] fix(extraction): address PR #14 review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Change "will be REJECTED" to "are INVALID — resolve them or omit"
  (no code-level enforcement, prompt-only instruction)
- Fix output format: "A concrete, self-contained fact from the
  conversation" instead of "USER explicitly stated"
- Merge assistant-sourced exclusion into SOURCE RULES to remove
  tension between EXCLUSIONS list and source rules
- Fix test statement: use atomized "User's dentist appointment was
  moved to March 15, 2024" instead of ambiguous "The appointment"
---
 src/memv/processing/prompts.py | 10 +++++-----
 tests/test_atomization.py      | 14 +++++++++++---
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/memv/processing/prompts.py b/src/memv/processing/prompts.py
index 3110e31..68194fe 100644
--- a/src/memv/processing/prompts.py
+++ b/src/memv/processing/prompts.py
@@ -66,12 +66,12 @@
 - Obvious or common knowledge
 - Speculative or uncertain claims
 - Conversation events ("User asked about X", "User requested Y") - extract the FACT, not the action
-- Assistant-sourced knowledge ("was advised to", "was suggested to", "was recommended to")
 
 **CRITICAL SOURCE RULES - READ CAREFULLY:**
 - Extract facts the user explicitly stated in their own messages
 - DO extract factual information communicated by the assistant (dates, appointments, confirmations, scheduled events)
-- NEVER extract assistant suggestions, recommendations, or code examples as user facts
+- NEVER extract assistant suggestions, recommendations, or hypotheticals as user facts
+  ("was advised to", "was suggested to", "was recommended to")
 - Treat assistant suggestions about user intent as speculative — never encode them as facts
 - NEVER attribute intent/action/statement to user if it originated in a hypothetical/suggestion/conditional from the assistant
 - Ignore assistant-led topics unless user acts on them
@@ -236,7 +236,7 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 {reference_timestamp}
 </reference_timestamp>
 You MUST resolve ALL relative dates using this timestamp.
-Statements with unresolved relative time ("yesterday", "last week") will be REJECTED.
+Statements with unresolved relative time ("yesterday", "last week") are INVALID — resolve them or omit.
 """
 
     return f"""Extract HIGH-VALUE, PERSISTENT knowledge from this conversation.
@@ -320,7 +320,7 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 {reference_timestamp}
 </reference_timestamp>
 You MUST resolve ALL relative dates using this timestamp.
-Statements with unresolved relative time ("yesterday", "last week") will be REJECTED.
+Statements with unresolved relative time ("yesterday", "last week") are INVALID — resolve them or omit.
 """
 
     return f"""Extract valuable knowledge by comparing actual conversation with predicted content.
@@ -380,7 +380,7 @@ def extraction_prompt_with_prediction(prediction: str, conversation: str, refere
 ## Output Format
 
 For each extracted item, specify:
-- statement: A fact the USER explicitly stated (not assistant suggestions)
+- statement: A concrete, self-contained fact from the conversation (see SOURCE RULES above)
 - knowledge_type: "new" if entirely new, "update" if refines existing, "contradiction" if conflicts
 - temporal_info: Human-readable description if mentioned ("since January 2024", "until next month")
 - valid_at: ISO 8601 datetime when fact became true, or null if unknown/always true (e.g., "2024-01-01T00:00:00Z")
diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index dc15a90..125f100 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -92,7 +92,15 @@ async def test_accepts_non_user_subject(tmp_path):
     llm.set_responses("generate", [_episode_json()])
     llm.set_responses(
         "generate_structured",
-        [_extraction([ExtractedKnowledge(statement="The appointment was moved to March 15", knowledge_type="new", confidence=0.9)])],
+        [
+            _extraction(
+                [
+                    ExtractedKnowledge(
+                        statement="User's dentist appointment was moved to March 15, 2024", knowledge_type="new", confidence=0.9
+                    )
+                ]
+            )
+        ],
     )
 
     memory = _make_memory(tmp_path, llm, embedder)
@@ -187,11 +195,11 @@ def test_warm_extraction_contains_atomization_rules(self):
 
     def test_cold_start_contains_strong_timestamp_wording(self):
         prompt = cold_start_extraction_prompt("Test", [{"role": "user", "content": "hi"}], "2024-06-15T12:00:00Z")
-        assert "will be REJECTED" in prompt
+        assert "are INVALID" in prompt
 
     def test_warm_extraction_contains_strong_timestamp_wording(self):
         prompt = extraction_prompt_with_prediction("prediction", ">>> USER: hi", "2024-06-15T12:00:00Z")
-        assert "will be REJECTED" in prompt
+        assert "are INVALID" in prompt
 
     def test_atomization_rules_constant_exists(self):
         assert "Self-Contained Statement Rules" in ATOMIZATION_RULES

From 953044a8ec8cb4f1ab0bece1dcde26d44ba8dd75 Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 21:53:19 +0100
Subject: [PATCH 09/10] fix(test): address PR #14 review round 2

- Rename test_accepts_non_user_subject to test_accepts_assistant_communicated_fact
  (name/docstring now matches what the test actually validates)
- Assert which item survived confidence boundary (Vstorm, not tea)
---
 tests/test_atomization.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index 125f100..c20f8f1 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -84,8 +84,8 @@ async def test_accepts_high_confidence(tmp_path):
         assert count == 1
 
 
-async def test_accepts_non_user_subject(tmp_path):
-    """Statements not starting with 'User' are accepted."""
+async def test_accepts_assistant_communicated_fact(tmp_path):
+    """Facts communicated by the assistant (dates, appointments) are accepted."""
     llm = MockLLM()
     embedder = MockEmbedder()
 
@@ -134,6 +134,9 @@ async def test_confidence_boundary(tmp_path):
         count = await memory.process("user1")
         assert count == 1
 
+        result = await memory.retrieve("Vstorm", user_id="user1")
+        assert result.retrieved_knowledge[0].statement == "User works at Vstorm"
+
 
 # ---------------------------------------------------------------------------
 # Temporal backfill

From 5ddda7dd775031288d36fb4b2952f4a516d1896e Mon Sep 17 00:00:00 2001
From: bartosz roguski <bartosz.k.roguski@gmail.com>
Date: Sat, 21 Feb 2026 22:05:07 +0100
Subject: [PATCH 10/10] fix(extraction): address PR #14 review round 3

- Align cold_start output format with warm extraction (both now say
  "A concrete, self-contained fact from the conversation")
- Simplify backfill condition: temporal_info alone triggers backfill
  (function is already idempotent on non-None fields)
- Add retrieve assertion to test_accepts_assistant_communicated_fact
---
 src/memv/memory/_pipeline.py   | 2 +-
 src/memv/processing/prompts.py | 2 +-
 tests/test_atomization.py      | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/memv/memory/_pipeline.py b/src/memv/memory/_pipeline.py
index 210af1f..256364d 100644
--- a/src/memv/memory/_pipeline.py
+++ b/src/memv/memory/_pipeline.py
@@ -158,7 +158,7 @@ async def _process_episode(self, messages: list[Message], user_id: str) -> int:
 
         # 6. Backfill temporal fields from temporal_info
         for item in extracted:
-            if item.temporal_info and (item.valid_at is None or item.invalid_at is None):
+            if item.temporal_info:
                 item.valid_at, item.invalid_at = backfill_temporal_fields(
                     item.temporal_info,
                     item.valid_at,
diff --git a/src/memv/processing/prompts.py b/src/memv/processing/prompts.py
index 68194fe..4a609c4 100644
--- a/src/memv/processing/prompts.py
+++ b/src/memv/processing/prompts.py
@@ -301,7 +301,7 @@ def cold_start_extraction_prompt(episode_title: str, original_messages: list[dic
 ## Output Format
 
 For each extracted item, specify:
-- statement: A clean, declarative fact about the user (third-person: "User...", not "I...")
+- statement: A concrete, self-contained fact from the conversation (see SOURCE RULES above)
 - knowledge_type: "new"
 - temporal_info: Human-readable description if mentioned ("since January 2024", "until next month")
 - valid_at: ISO 8601 datetime when fact became true, or null if unknown/always true (e.g., "2024-01-01T00:00:00Z")
diff --git a/tests/test_atomization.py b/tests/test_atomization.py
index c20f8f1..40d933d 100644
--- a/tests/test_atomization.py
+++ b/tests/test_atomization.py
@@ -109,6 +109,9 @@ async def test_accepts_assistant_communicated_fact(tmp_path):
         count = await memory.process("user1")
         assert count == 1
 
+        result = await memory.retrieve("dentist appointment", user_id="user1")
+        assert "March 15, 2024" in result.retrieved_knowledge[0].statement
+
 
 async def test_confidence_boundary(tmp_path):
     """Confidence exactly at 0.7 passes, below does not."""