From 7d59af44410ba9a2fb67ce606bf13570c6299e6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 16:38:28 +0100
Subject: [PATCH 01/18] fix: prevent silent memory loss on consolidation LLM
 failure

When all LLM retries are exhausted during consolidation, memories were
being marked consolidated_at unconditionally, permanently excluding them
from future consolidation runs without producing any observations.

Fix with two complementary mechanisms:
- Adaptive batch splitting: on LLM failure, the batch is halved and
  retried recursively down to batch_size=1, recovering most transient
  failures (rate limits, Pydantic validation on long prompts) without
  operator intervention
- consolidation_failed_at column: only single-memory batches that still
  fail after all retries are marked here instead of consolidated_at, so
  they remain visible and retryable
- New API endpoint POST /v1/default/banks/{bank_id}/consolidation/retry-failed
  resets these memories for the next consolidation run
---
 ...consolidation_failed_at_to_memory_units.py |  52 +++++
 hindsight-api-slim/hindsight_api/api/http.py  |  38 ++++
 .../engine/consolidation/consolidator.py      | 215 +++++++++++-------
 .../hindsight_api/engine/memory_engine.py     |  52 +++++
 4 files changed, 276 insertions(+), 81 deletions(-)
 create mode 100644 hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py

diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py b/hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py
new file mode 100644
index 00000000..b141c18e
--- /dev/null
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py
@@ -0,0 +1,52 @@
+"""Add consolidation_failed_at column to memory_units for tracking persistent LLM failures.
+
+When all LLM retries are exhausted on a single-memory batch, the memory is marked
+with consolidation_failed_at instead of consolidated_at, so it is not silently lost
+and can be retried later via the API.
+
+Revision ID: a3b4c5d6e7f8
+Revises: z1u2v3w4x5y6
+Create Date: 2026-03-17
+"""
+
+from collections.abc import Sequence
+
+from alembic import context, op
+
+revision: str = "a3b4c5d6e7f8"
+down_revision: str | Sequence[str] | None = "z1u2v3w4x5y6"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _get_schema_prefix() -> str:
+    """Get schema prefix for table names (required for multi-tenant support)."""
+    schema = context.config.get_main_option("target_schema")
+    return f'"{schema}".' if schema else ""
+
+
+def upgrade() -> None:
+    schema = _get_schema_prefix()
+
+    op.execute(
+        f"""
+        ALTER TABLE {schema}memory_units
+        ADD COLUMN IF NOT EXISTS consolidation_failed_at TIMESTAMPTZ DEFAULT NULL
+        """
+    )
+
+    # Index to efficiently query memories that failed consolidation for a given bank
+    op.execute(
+        f"""
+        CREATE INDEX IF NOT EXISTS idx_memory_units_consolidation_failed
+        ON {schema}memory_units (bank_id, consolidation_failed_at)
+        WHERE consolidation_failed_at IS NOT NULL AND fact_type IN ('experience', 'world')
+        """
+    )
+
+
+def downgrade() -> None:
+    schema = _get_schema_prefix()
+
+    op.execute(f"DROP INDEX IF EXISTS {schema}idx_memory_units_consolidation_failed")
+    op.execute(f"ALTER TABLE {schema}memory_units DROP COLUMN IF EXISTS consolidation_failed_at")
diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
index 5d5206ba..16d00d1d 100644
--- a/hindsight-api-slim/hindsight_api/api/http.py
+++ b/hindsight-api-slim/hindsight_api/api/http.py
@@ -1328,6 +1328,14 @@ class ClearMemoryObservationsResponse(BaseModel):
     deleted_count: int
 
 
+class RetryFailedConsolidationResponse(BaseModel):
+    """Response model for retrying failed consolidation."""
+
+    model_config = ConfigDict(json_schema_extra={"example": {"retried_count": 42}})
+
+    retried_count: int
+
+
 class BankStatsResponse(BaseModel):
     """Response model for bank statistics endpoint."""
 
@@ -3902,6 +3910,36 @@ async def api_clear_observations(bank_id: str, request_context: RequestContext =
             logger.error(f"Error in DELETE /v1/default/banks/{bank_id}/observations: {error_detail}")
             raise HTTPException(status_code=500, detail=str(e))
 
+    @app.post(
+        "/v1/default/banks/{bank_id}/consolidation/retry-failed",
+        response_model=RetryFailedConsolidationResponse,
+        summary="Retry failed consolidation",
+        description=(
+            "Reset all memories that were permanently marked as failed during consolidation "
+            "(after exhausting all LLM retries and adaptive batch splitting) so they are "
+            "picked up again on the next consolidation run. Does not delete any observations."
+        ),
+        operation_id="retry_failed_consolidation",
+        tags=["Banks"],
+    )
+    async def api_retry_failed_consolidation(
+        bank_id: str, request_context: RequestContext = Depends(get_request_context)
+    ):
+        """Reset consolidation-failed memories for retry."""
+        try:
+            result = await app.state.memory.retry_failed_consolidation(bank_id, request_context=request_context)
+            return RetryFailedConsolidationResponse(retried_count=result["retried_count"])
+        except OperationValidationError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.reason)
+        except (AuthenticationError, HTTPException):
+            raise
+        except Exception as e:
+            import traceback
+
+            error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+            logger.error(f"Error in POST /v1/default/banks/{bank_id}/consolidation/retry-failed: {error_detail}")
+            raise HTTPException(status_code=500, detail=str(e))
+
     @app.delete(
         "/v1/default/banks/{bank_id}/memories/{memory_id}/observations",
         response_model=ClearMemoryObservationsResponse,
diff --git a/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py b/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py
index 578172d2..a309e015 100644
--- a/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py
+++ b/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py
@@ -80,6 +80,7 @@ class _BatchLLMResult:
     deletes: list[_DeleteAction] = field(default_factory=list)
     obs_count: int = 0
     prompt_chars: int = 0
+    failed: bool = False
 
 
 @dataclass
@@ -240,6 +241,7 @@ async def run_consolidation_job(
         "observations_deleted": 0,
         "actions_executed": 0,
         "skipped": 0,
+        "memories_failed": 0,
     }
 
     # Track all unique tags from consolidated memories for mental model refresh filtering
@@ -298,94 +300,141 @@ async def run_consolidation_job(
                 if memory_tags:
                     consolidated_tags.update(memory_tags)
 
-            async with pool.acquire() as conn:
-                # Determine observation_scopes for this batch. All memories in a batch share
-                # the same tags (enforced by tag_groups), so we only check the first memory.
-                # asyncpg returns JSONB columns as raw JSON strings, so parse if needed.
-                _obs_raw = llm_batch[0].get("observation_scopes") if llm_batch else None
-                _obs_parsed = json.loads(_obs_raw) if isinstance(_obs_raw, str) else _obs_raw
-
-                # Resolve the scope spec into a concrete list[list[str]] (or None for combined).
-                if _obs_parsed == "per_tag":
-                    _memory_tags = llm_batch[0].get("tags") or []
-                    obs_tags_list = [[tag] for tag in _memory_tags] if _memory_tags else None
-                elif _obs_parsed == "all_combinations":
-                    _memory_tags = llm_batch[0].get("tags") or []
-                    obs_tags_list = (
-                        [
-                            list(combo)
-                            for r in range(1, len(_memory_tags) + 1)
-                            for combo in combinations(_memory_tags, r)
-                        ]
-                        if _memory_tags
-                        else None
-                    )
-                elif _obs_parsed == "combined" or _obs_parsed is None:
-                    obs_tags_list = None  # single combined pass (default behaviour)
-                else:
-                    # explicit list[list[str]]
-                    obs_tags_list = _obs_parsed
-
-                batch_deleted: int = 0
-                if obs_tags_list:
-                    # Multi-pass: run one observation consolidation pass per tag set
-                    results = []
-                    for obs_tags in obs_tags_list:
-                        pass_results, pass_deleted = await _process_memory_batch(
+            # Process llm_batch with adaptive splitting: on LLM failure, halve the sub-batch
+            # and retry, down to batch_size=1. Only if a single-memory batch still fails is
+            # the memory marked with consolidation_failed_at and excluded from future runs
+            # until explicitly retried via the API.
+            all_results: list[dict[str, Any]] = []
+            all_deleted = 0
+            succeeded_ids: list[Any] = []
+            failed_ids: list[Any] = []
+
+            pending: list[list[dict[str, Any]]] = [llm_batch]
+            while pending:
+                sub_batch = pending.pop(0)
+
+                async with pool.acquire() as conn:
+                    # Determine observation_scopes for this sub-batch. All memories share
+                    # the same tags (enforced by tag_groups), so we only check the first memory.
+                    # asyncpg returns JSONB columns as raw JSON strings, so parse if needed.
+                    _obs_raw = sub_batch[0].get("observation_scopes") if sub_batch else None
+                    _obs_parsed = json.loads(_obs_raw) if isinstance(_obs_raw, str) else _obs_raw
+
+                    # Resolve the scope spec into a concrete list[list[str]] (or None for combined).
+                    if _obs_parsed == "per_tag":
+                        _memory_tags = sub_batch[0].get("tags") or []
+                        obs_tags_list = [[tag] for tag in _memory_tags] if _memory_tags else None
+                    elif _obs_parsed == "all_combinations":
+                        _memory_tags = sub_batch[0].get("tags") or []
+                        obs_tags_list = (
+                            [
+                                list(combo)
+                                for r in range(1, len(_memory_tags) + 1)
+                                for combo in combinations(_memory_tags, r)
+                            ]
+                            if _memory_tags
+                            else None
+                        )
+                    elif _obs_parsed == "combined" or _obs_parsed is None:
+                        obs_tags_list = None  # single combined pass (default behaviour)
+                    else:
+                        # explicit list[list[str]]
+                        obs_tags_list = _obs_parsed
+
+                    sub_deleted: int = 0
+                    sub_llm_failed = False
+                    if obs_tags_list:
+                        # Multi-pass: run one observation consolidation pass per tag set
+                        sub_results: list[dict[str, Any]] = []
+                        for obs_tags in obs_tags_list:
+                            pass_results, pass_deleted, pass_failed = await _process_memory_batch(
+                                conn=conn,
+                                memory_engine=memory_engine,
+                                llm_config=llm_config,
+                                bank_id=bank_id,
+                                memories=sub_batch,
+                                request_context=request_context,
+                                perf=perf,
+                                config=config,
+                                obs_tags_override=obs_tags,
+                            )
+                            sub_deleted += pass_deleted
+                            sub_llm_failed = sub_llm_failed or pass_failed
+                            # Merge results: prefer non-skipped actions
+                            if not sub_results:
+                                sub_results = pass_results
+                            else:
+                                for i, (existing, new) in enumerate(zip(sub_results, pass_results)):
+                                    if existing.get("action") == "skipped" and new.get("action") != "skipped":
+                                        sub_results[i] = new
+                                    elif existing.get("action") != "skipped" and new.get("action") != "skipped":
+                                        # Both did something — combine into "multiple"
+                                        existing_created = existing.get(
+                                            "created", 1 if existing.get("action") == "created" else 0
+                                        )
+                                        existing_updated = existing.get(
+                                            "updated", 1 if existing.get("action") == "updated" else 0
+                                        )
+                                        new_created = new.get("created", 1 if new.get("action") == "created" else 0)
+                                        new_updated = new.get("updated", 1 if new.get("action") == "updated" else 0)
+                                        total = existing_created + existing_updated + new_created + new_updated
+                                        sub_results[i] = {
+                                            "action": "multiple",
+                                            "created": existing_created + new_created,
+                                            "updated": existing_updated + new_updated,
+                                            "merged": 0,
+                                            "total_actions": total,
+                                        }
+                    else:
+                        # Normal single pass using the memory's own tags
+                        sub_results, sub_deleted, sub_llm_failed = await _process_memory_batch(
                             conn=conn,
                             memory_engine=memory_engine,
                             llm_config=llm_config,
                             bank_id=bank_id,
-                            memories=llm_batch,
+                            memories=sub_batch,
                             request_context=request_context,
                             perf=perf,
                             config=config,
-                            obs_tags_override=obs_tags,
                         )
-                        batch_deleted += pass_deleted
-                        # Merge results: prefer non-skipped actions
-                        if not results:
-                            results = pass_results
-                        else:
-                            for i, (existing, new) in enumerate(zip(results, pass_results)):
-                                if existing.get("action") == "skipped" and new.get("action") != "skipped":
-                                    results[i] = new
-                                elif existing.get("action") != "skipped" and new.get("action") != "skipped":
-                                    # Both did something — combine into "multiple"
-                                    existing_created = existing.get(
-                                        "created", 1 if existing.get("action") == "created" else 0
-                                    )
-                                    existing_updated = existing.get(
-                                        "updated", 1 if existing.get("action") == "updated" else 0
-                                    )
-                                    new_created = new.get("created", 1 if new.get("action") == "created" else 0)
-                                    new_updated = new.get("updated", 1 if new.get("action") == "updated" else 0)
-                                    total = existing_created + existing_updated + new_created + new_updated
-                                    results[i] = {
-                                        "action": "multiple",
-                                        "created": existing_created + new_created,
-                                        "updated": existing_updated + new_updated,
-                                        "merged": 0,
-                                        "total_actions": total,
-                                    }
+
+                all_deleted += sub_deleted
+
+                if sub_llm_failed and len(sub_batch) > 1:
+                    # Split and retry with smaller batches
+                    mid = len(sub_batch) // 2
+                    logger.warning(
+                        f"[CONSOLIDATION] bank={bank_id} LLM failed for sub-batch of {len(sub_batch)},"
+                        f" splitting into {mid}/{len(sub_batch) - mid}"
+                    )
+                    pending[0:0] = [sub_batch[:mid], sub_batch[mid:]]
+                elif sub_llm_failed:
+                    # batch_size=1 and still failing — mark as permanently failed for now
+                    failed_ids.append(sub_batch[0]["id"])
+                    all_results.append({"action": "failed"})
+                    logger.warning(
+                        f"[CONSOLIDATION] bank={bank_id} LLM failed for single memory"
+                        f" {sub_batch[0]['id']}, marking consolidation_failed_at"
+                    )
                 else:
-                    # Normal single pass using the memory's own tags
-                    results, batch_deleted = await _process_memory_batch(
-                        conn=conn,
-                        memory_engine=memory_engine,
-                        llm_config=llm_config,
-                        bank_id=bank_id,
-                        memories=llm_batch,
-                        request_context=request_context,
-                        perf=perf,
-                        config=config,
+                    succeeded_ids.extend(m["id"] for m in sub_batch)
+                    all_results.extend(sub_results)
+
+            # Commit consolidated_at / consolidation_failed_at in a single DB round-trip
+            async with pool.acquire() as conn:
+                if succeeded_ids:
+                    await conn.executemany(
+                        f"UPDATE {fq_table('memory_units')} SET consolidated_at = NOW() WHERE id = $1",
+                        [(mem_id,) for mem_id in succeeded_ids],
+                    )
+                if failed_ids:
+                    await conn.executemany(
+                        f"UPDATE {fq_table('memory_units')} SET consolidation_failed_at = NOW() WHERE id = $1",
+                        [(mem_id,) for mem_id in failed_ids],
                     )
-                stats["observations_deleted"] += batch_deleted
 
-                await conn.executemany(
-                    f"UPDATE {fq_table('memory_units')} SET consolidated_at = NOW() WHERE id = $1",
-                    [(m["id"],) for m in llm_batch],
-                )
+            stats["observations_deleted"] += all_deleted
+            results = all_results
 
             # Checkpoint: abort if the operation (and thus the bank) was deleted mid-run.
             if operation_id and not await memory_engine._check_op_alive(operation_id):
@@ -413,6 +462,8 @@ async def run_consolidation_job(
                     stats["actions_executed"] += result.get("total_actions", 0)
                 elif action == "skipped":
                     stats["skipped"] += 1
+                elif action == "failed":
+                    stats["memories_failed"] += 1
 
             # Per-LLM-batch log
             llm_batch_time = time.time() - llm_batch_start
@@ -425,6 +476,7 @@ async def run_consolidation_job(
             batch_created = stats["observations_created"] - snap_stats["observations_created"]
             batch_updated = stats["observations_updated"] - snap_stats["observations_updated"]
             batch_skipped = stats["skipped"] - snap_stats["skipped"]
+            batch_failed = stats["memories_failed"] - snap_stats["memories_failed"]
             llm_calls_made = perf.llm_calls - snap_llm_calls
             logger.info(
                 f"[CONSOLIDATION] bank={bank_id} llm_batch #{llm_batch_num}"
@@ -432,7 +484,8 @@ async def run_consolidation_job(
                 f" | {stats['memories_processed']}/{total_count} processed"
                 f" | {', '.join(timing_parts)}"
                 f" | created={batch_created} updated={batch_updated} skipped={batch_skipped}"
-                f" | input_tokens=~{input_tokens}"
+                + (f" failed={batch_failed}" if batch_failed else "")
+                + f" | input_tokens=~{input_tokens}"
                 f" | avg={llm_batch_time / len(llm_batch):.3f}s/memory"
             )
 
@@ -584,7 +637,7 @@ async def _process_memory_batch(
     perf: ConsolidationPerfLog | None = None,
     config: Any = None,
     obs_tags_override: list[str] | None = None,
-) -> tuple[list[dict[str, Any]], int]:
+) -> tuple[list[dict[str, Any]], int, bool]:
     """
     Process a batch of memories in a single LLM call.
 
@@ -747,7 +800,7 @@ async def _process_memory_batch(
         else:
             results.append({"action": "skipped", "reason": "no_durable_knowledge"})
 
-    return results, deleted_count
+    return results, deleted_count, llm_result.failed
 
 
 def _min_date(dates: "Any") -> "datetime | None":
@@ -1081,7 +1134,7 @@ def _fact_line(m: dict[str, Any]) -> str:
     logger.error(
         f"[CONSOLIDATION] LLM batch call failed after {max_attempts} attempts, skipping batch. Last error: {last_exc}"
     )
-    return _BatchLLMResult(obs_count=len(union_observations), prompt_chars=len(prompt))
+    return _BatchLLMResult(obs_count=len(union_observations), prompt_chars=len(prompt), failed=True)
 
 
 async def _create_observation_directly(
diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
index fb081668..eeb71e5a 100644
--- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py
+++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -3878,6 +3878,58 @@ async def clear_observations(
 
                 return {"deleted_count": count or 0}
 
+    async def retry_failed_consolidation(
+        self,
+        bank_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, int]:
+        """
+        Reset memories that previously failed consolidation so they are retried on the next
+        consolidation run.
+
+        Clears consolidation_failed_at (and consolidated_at) for all memories in the bank
+        that were marked as permanently failed after exhausting all LLM retries and adaptive
+        batch splitting. Does not delete any observations.
+
+        Args:
+            bank_id: Bank ID
+            request_context: Request context for authentication.
+
+        Returns:
+            Dictionary with count of memories queued for retry.
+        """
+        await self._authenticate_tenant(request_context)
+        if self._operation_validator:
+            from hindsight_api.extensions import BankWriteContext
+
+            ctx = BankWriteContext(
+                bank_id=bank_id, operation="retry_failed_consolidation", request_context=request_context
+            )
+            await self._validate_operation(self._operation_validator.validate_bank_write(ctx))
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            count = await conn.fetchval(
+                f"""
+                SELECT COUNT(*) FROM {fq_table("memory_units")}
+                WHERE bank_id = $1
+                  AND consolidation_failed_at IS NOT NULL
+                  AND fact_type IN ('experience', 'world')
+                """,
+                bank_id,
+            )
+            await conn.execute(
+                f"""
+                UPDATE {fq_table("memory_units")}
+                SET consolidation_failed_at = NULL, consolidated_at = NULL
+                WHERE bank_id = $1
+                  AND consolidation_failed_at IS NOT NULL
+                  AND fact_type IN ('experience', 'world')
+                """,
+                bank_id,
+            )
+            return {"retried_count": count or 0}
+
     async def clear_observations_for_memory(
         self,
         bank_id: str,

From fc27ba94a3f35c704528f3c44fe5a89ba043fe5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 16:47:14 +0100
Subject: [PATCH 02/18] chore: regenerate OpenAPI spec

---
 hindsight-docs/static/openapi.json | 76 ++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/hindsight-docs/static/openapi.json b/hindsight-docs/static/openapi.json
index a41f2c3a..efb1f461 100644
--- a/hindsight-docs/static/openapi.json
+++ b/hindsight-docs/static/openapi.json
@@ -3125,6 +3125,65 @@
         }
       }
     },
+    "/v1/default/banks/{bank_id}/consolidation/retry-failed": {
+      "post": {
+        "tags": [
+          "Banks"
+        ],
+        "summary": "Retry failed consolidation",
+        "description": "Reset all memories that were permanently marked as failed during consolidation (after exhausting all LLM retries and adaptive batch splitting) so they are picked up again on the next consolidation run. Does not delete any observations.",
+        "operationId": "retry_failed_consolidation",
+        "parameters": [
+          {
+            "name": "bank_id",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "title": "Bank Id"
+            }
+          },
+          {
+            "name": "authorization",
+            "in": "header",
+            "required": false,
+            "schema": {
+              "anyOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "null"
+                }
+              ],
+              "title": "Authorization"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/RetryFailedConsolidationResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
     "/v1/default/banks/{bank_id}/memories/{memory_id}/observations": {
       "delete": {
         "tags": [
@@ -7582,6 +7641,23 @@
           }
         }
       },
+      "RetryFailedConsolidationResponse": {
+        "properties": {
+          "retried_count": {
+            "type": "integer",
+            "title": "Retried Count"
+          }
+        },
+        "type": "object",
+        "required": [
+          "retried_count"
+        ],
+        "title": "RetryFailedConsolidationResponse",
+        "description": "Response model for retrying failed consolidation.",
+        "example": {
+          "retried_count": 42
+        }
+      },
       "RetryOperationResponse": {
         "properties": {
           "success": {

From f1fd46733f0a639719d1b2a6c54b721dc7f6ab90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 16:51:18 +0100
Subject: [PATCH 03/18] fix: rename consolidation endpoint from /retry-failed
 to /recover

---
 hindsight-api-slim/hindsight_api/api/http.py | 20 +++++-----
 hindsight-docs/static/openapi.json           | 42 ++++++++++----------
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
index 16d00d1d..ec397c21 100644
--- a/hindsight-api-slim/hindsight_api/api/http.py
+++ b/hindsight-api-slim/hindsight_api/api/http.py
@@ -1328,8 +1328,8 @@ class ClearMemoryObservationsResponse(BaseModel):
     deleted_count: int
 
 
-class RetryFailedConsolidationResponse(BaseModel):
-    """Response model for retrying failed consolidation."""
+class RecoverConsolidationResponse(BaseModel):
+    """Response model for recovering failed consolidation."""
 
     model_config = ConfigDict(json_schema_extra={"example": {"retried_count": 42}})
 
@@ -3911,24 +3911,24 @@ async def api_clear_observations(bank_id: str, request_context: RequestContext =
             raise HTTPException(status_code=500, detail=str(e))
 
     @app.post(
-        "/v1/default/banks/{bank_id}/consolidation/retry-failed",
-        response_model=RetryFailedConsolidationResponse,
-        summary="Retry failed consolidation",
+        "/v1/default/banks/{bank_id}/consolidation/recover",
+        response_model=RecoverConsolidationResponse,
+        summary="Recover failed consolidation",
         description=(
             "Reset all memories that were permanently marked as failed during consolidation "
             "(after exhausting all LLM retries and adaptive batch splitting) so they are "
             "picked up again on the next consolidation run. Does not delete any observations."
         ),
-        operation_id="retry_failed_consolidation",
+        operation_id="recover_consolidation",
         tags=["Banks"],
     )
-    async def api_retry_failed_consolidation(
+    async def api_recover_consolidation(
         bank_id: str, request_context: RequestContext = Depends(get_request_context)
     ):
-        """Reset consolidation-failed memories for retry."""
+        """Reset consolidation-failed memories for recovery."""
         try:
             result = await app.state.memory.retry_failed_consolidation(bank_id, request_context=request_context)
-            return RetryFailedConsolidationResponse(retried_count=result["retried_count"])
+            return RecoverConsolidationResponse(retried_count=result["retried_count"])
         except OperationValidationError as e:
             raise HTTPException(status_code=e.status_code, detail=e.reason)
         except (AuthenticationError, HTTPException):
@@ -3937,7 +3937,7 @@ async def api_retry_failed_consolidation(
             import traceback
 
             error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
-            logger.error(f"Error in POST /v1/default/banks/{bank_id}/consolidation/retry-failed: {error_detail}")
+            logger.error(f"Error in POST /v1/default/banks/{bank_id}/consolidation/recover: {error_detail}")
             raise HTTPException(status_code=500, detail=str(e))
 
     @app.delete(
diff --git a/hindsight-docs/static/openapi.json b/hindsight-docs/static/openapi.json
index efb1f461..78777d64 100644
--- a/hindsight-docs/static/openapi.json
+++ b/hindsight-docs/static/openapi.json
@@ -3125,14 +3125,14 @@
         }
       }
     },
-    "/v1/default/banks/{bank_id}/consolidation/retry-failed": {
+    "/v1/default/banks/{bank_id}/consolidation/recover": {
       "post": {
         "tags": [
           "Banks"
         ],
-        "summary": "Retry failed consolidation",
+        "summary": "Recover failed consolidation",
         "description": "Reset all memories that were permanently marked as failed during consolidation (after exhausting all LLM retries and adaptive batch splitting) so they are picked up again on the next consolidation run. Does not delete any observations.",
-        "operationId": "retry_failed_consolidation",
+        "operationId": "recover_consolidation",
         "parameters": [
           {
             "name": "bank_id",
@@ -3166,7 +3166,7 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/RetryFailedConsolidationResponse"
+                  "$ref": "#/components/schemas/RecoverConsolidationResponse"
                 }
               }
             }
@@ -6960,6 +6960,23 @@
           "type": "world"
         }
       },
+      "RecoverConsolidationResponse": {
+        "properties": {
+          "retried_count": {
+            "type": "integer",
+            "title": "Retried Count"
+          }
+        },
+        "type": "object",
+        "required": [
+          "retried_count"
+        ],
+        "title": "RecoverConsolidationResponse",
+        "description": "Response model for recovering failed consolidation.",
+        "example": {
+          "retried_count": 42
+        }
+      },
       "ReflectBasedOn": {
         "properties": {
           "memories": {
@@ -7641,23 +7658,6 @@
           }
         }
       },
-      "RetryFailedConsolidationResponse": {
-        "properties": {
-          "retried_count": {
-            "type": "integer",
-            "title": "Retried Count"
-          }
-        },
-        "type": "object",
-        "required": [
-          "retried_count"
-        ],
-        "title": "RetryFailedConsolidationResponse",
-        "description": "Response model for retrying failed consolidation.",
-        "example": {
-          "retried_count": 42
-        }
-      },
       "RetryOperationResponse": {
         "properties": {
           "success": {

From 3d1e03462753c637813972223a8f8565f354fd98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 17:23:19 +0100
Subject: [PATCH 04/18] fix: add consolidation_failed_at column, adaptive batch
 splitting, and recovery API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Migration a3b4c5d6e7f8: add consolidation_failed_at TIMESTAMPTZ column to
  memory_units with an index for efficient failure queries; properly chains off
  g7h8i9j0k1l2 (backsweep_orphan_observations)
- Consolidator: filter pending memories with consolidation_failed_at IS NULL
  so failed memories are not re-fetched in an infinite loop
- Consolidator: adaptive batch splitting — when a batch exhausts all 3 LLM
  retries, halve it and retry sub-batches recursively; only single-memory
  batches that also exhaust all retries get consolidation_failed_at set
- New tests (9 total) covering: adaptive splitting recovers all memories,
  larger batch splitting, single-memory permanent failure, exclusion from
  next run, partial batch failure, recover resets columns, recover returns
  0 when none failed, recover-then-consolidate succeeds, HTTP endpoint
---
 ...consolidation_failed_at_to_memory_units.py |   4 +-
 .../engine/consolidation/consolidator.py      |   2 +
 .../test_consolidation_failure_recovery.py    | 476 ++++++++++++++++++
 3 files changed, 480 insertions(+), 2 deletions(-)
 create mode 100644 hindsight-api-slim/tests/test_consolidation_failure_recovery.py

diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py b/hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py
index b141c18e..06e5f088 100644
--- a/hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/a3b4c5d6e7f8_add_consolidation_failed_at_to_memory_units.py
@@ -5,7 +5,7 @@
 and can be retried later via the API.
 
 Revision ID: a3b4c5d6e7f8
-Revises: z1u2v3w4x5y6
+Revises: g7h8i9j0k1l2
 Create Date: 2026-03-17
 """
 
@@ -14,7 +14,7 @@
 from alembic import context, op
 
 revision: str = "a3b4c5d6e7f8"
-down_revision: str | Sequence[str] | None = "z1u2v3w4x5y6"
+down_revision: str | Sequence[str] | None = "g7h8i9j0k1l2"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 
diff --git a/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py b/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py
index a309e015..50bfe094 100644
--- a/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py
+++ b/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py
@@ -220,6 +220,7 @@ async def run_consolidation_job(
             FROM {fq_table("memory_units")}
             WHERE bank_id = $1
               AND consolidated_at IS NULL
+              AND consolidation_failed_at IS NULL
               AND fact_type IN ('experience', 'world')
             """,
             bank_id,
@@ -259,6 +260,7 @@ async def run_consolidation_job(
                 FROM {fq_table("memory_units")}
                 WHERE bank_id = $1
                   AND consolidated_at IS NULL
+                  AND consolidation_failed_at IS NULL
                   AND fact_type IN ('experience', 'world')
                 ORDER BY created_at ASC
                 LIMIT $2
diff --git a/hindsight-api-slim/tests/test_consolidation_failure_recovery.py b/hindsight-api-slim/tests/test_consolidation_failure_recovery.py
new file mode 100644
index 00000000..c737ddd4
--- /dev/null
+++ b/hindsight-api-slim/tests/test_consolidation_failure_recovery.py
@@ -0,0 +1,476 @@
+"""Tests for consolidation failure handling: adaptive batch splitting, consolidation_failed_at,
+and the recovery API.
+
+These tests use a mock LLM to simulate LLM failures deterministically, without making real
+API calls. All tests insert memories directly into the database to bypass retain's LLM calls
+and focus exclusively on the consolidation code paths.
+"""
+
+import uuid
+from unittest.mock import MagicMock
+
+import pytest
+import pytest_asyncio
+
+from hindsight_api.engine.consolidation.consolidator import run_consolidation_job
+from hindsight_api.engine.memory_engine import MemoryEngine
+from hindsight_api.engine.providers.mock_llm import MockLLM
+from hindsight_api.engine.task_backend import SyncTaskBackend
+
+
+@pytest_asyncio.fixture(scope="function")
+async def memory_no_llm_verify(pg0_db_url, embeddings, cross_encoder, query_analyzer):
+    """MemoryEngine with mock LLM.
+
+    Migrations are already applied by the session-scoped pg0_db_url fixture, so
+    run_migrations=False avoids advisory-lock serialization overhead per test.
+    """
+    mem = MemoryEngine(
+        db_url=pg0_db_url,
+        memory_llm_provider="mock",
+        memory_llm_api_key="",
+        memory_llm_model="mock",
+        embeddings=embeddings,
+        cross_encoder=cross_encoder,
+        query_analyzer=query_analyzer,
+        pool_min_size=1,
+        pool_max_size=5,
+        run_migrations=False,
+        task_backend=SyncTaskBackend(),
+        skip_llm_verification=True,
+    )
+    await mem.initialize()
+    yield mem
+    try:
+        if mem._pool and not mem._pool._closing:
+            await mem.close()
+    except Exception:
+        pass
+
+
+@pytest.fixture(autouse=True)
+def enable_observations():
+    """Enable observations for all tests in this module."""
+    from hindsight_api.config import _get_raw_config
+
+    config = _get_raw_config()
+    original = config.enable_observations
+    config.enable_observations = True
+    yield
+    config.enable_observations = original
+
+
+def _make_failing_mock_llm(*, fail_first_n: int = 999) -> MockLLM:
+    """Return a MockLLM that raises ValueError for the first `fail_first_n` consolidation calls."""
+    mock_llm = MockLLM(provider="mock", api_key="", base_url="", model="mock-model")
+    call_count = 0
+
+    def callback(messages, scope):
+        nonlocal call_count
+        if scope == "consolidation":
+            call_count += 1
+            if call_count <= fail_first_n:
+                raise ValueError(f"Simulated LLM failure (call {call_count})")
+        # Return empty response — no creates/updates/deletes
+        from hindsight_api.engine.consolidation.consolidator import _ConsolidationBatchResponse
+
+        return _ConsolidationBatchResponse()
+
+    mock_llm.set_response_callback(callback)
+    return mock_llm
+
+
+def _make_always_success_mock_llm() -> MockLLM:
+    """Return a MockLLM that always succeeds with an empty consolidation response."""
+    mock_llm = MockLLM(provider="mock", api_key="", base_url="", model="mock-model")
+
+    def callback(messages, scope):
+        from hindsight_api.engine.consolidation.consolidator import _ConsolidationBatchResponse
+
+        return _ConsolidationBatchResponse()
+
+    mock_llm.set_response_callback(callback)
+    return mock_llm
+
+
+def _inject_mock_llm(memory: MemoryEngine, mock_llm: MockLLM) -> None:
+    """Replace memory._consolidation_llm_config with a wrapper that returns mock_llm from with_config."""
+    wrapper = MagicMock()
+    wrapper.with_config.return_value = mock_llm
+    memory._consolidation_llm_config = wrapper
+
+
+async def _insert_memories(conn, bank_id: str, texts: list[str]) -> list[uuid.UUID]:
+    """Insert experience memories directly, bypassing LLM-based retain."""
+    ids = []
+    for text in texts:
+        mem_id = uuid.uuid4()
+        await conn.execute(
+            """
+            INSERT INTO memory_units (id, bank_id, text, fact_type, created_at)
+            VALUES ($1, $2, $3, 'experience', now())
+            """,
+            mem_id,
+            bank_id,
+            text,
+        )
+        ids.append(mem_id)
+    return ids
+
+
+class TestAdaptiveBatchSplitting:
+    """Verify that a failing batch is halved and retried until batch_size=1 succeeds."""
+
+    @pytest.mark.asyncio
+    async def test_splitting_recovers_all_memories(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """When a batch of 2 fails, both are retried individually and succeed."""
+        bank_id = f"test-split-recovery-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            mem_ids = await _insert_memories(
+                conn,
+                bank_id,
+                [
+                    "Alice runs marathons every spring.",
+                    "Alice trained for six months for her last race.",
+                ],
+            )
+
+        # Exhaust all 3 retries for batch=2 (calls 1-3 fail), then each batch=1 succeeds (calls 4-5)
+        mock_llm = _make_failing_mock_llm(fail_first_n=3)
+        _inject_mock_llm(memory_no_llm_verify, mock_llm)
+
+        result = await run_consolidation_job(
+            memory_engine=memory_no_llm_verify,
+            bank_id=bank_id,
+            request_context=request_context,
+        )
+
+        assert result["status"] == "completed"
+        assert result["memories_processed"] == 2
+        assert result["memories_failed"] == 0
+
+        # Both memories must have consolidated_at set and consolidation_failed_at NULL
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            rows = await conn.fetch(
+                """
+                SELECT id, consolidated_at, consolidation_failed_at
+                FROM memory_units
+                WHERE bank_id = $1 AND fact_type = 'experience'
+                """,
+                bank_id,
+            )
+        assert len(rows) == 2
+        for row in rows:
+            assert row["consolidated_at"] is not None, f"Memory {row['id']} should have consolidated_at set"
+            assert row["consolidation_failed_at"] is None, (
+                f"Memory {row['id']} should NOT have consolidation_failed_at set"
+            )
+
+        # LLM called 5 times: 3 retries failed (batch=2) + 1 succeeded (batch=1) + 1 succeeded (batch=1)
+        consolidation_calls = [c for c in mock_llm.get_mock_calls() if c["scope"] == "consolidation"]
+        assert len(consolidation_calls) == 5
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+    @pytest.mark.asyncio
+    async def test_splitting_with_larger_batch(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """A batch of 4 that always fails at size>1 resolves to 4 individual calls."""
+        bank_id = f"test-split-large-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            await _insert_memories(
+                conn,
+                bank_id,
+                [
+                    "Bob plays chess competitively.",
+                    "Bob won a regional chess tournament.",
+                    "Bob practices tactics every morning.",
+                    "Bob coaches youth chess on weekends.",
+                ],
+            )
+
+        # Exhaust all 3 retries for batch=4 (calls 1-3 fail), then both batch=2 halves succeed
+        # (calls 4-5). This verifies that halving once is sufficient when batch=2 works.
+        mock_llm = _make_failing_mock_llm(fail_first_n=3)
+        _inject_mock_llm(memory_no_llm_verify, mock_llm)
+
+        result = await run_consolidation_job(
+            memory_engine=memory_no_llm_verify,
+            bank_id=bank_id,
+            request_context=request_context,
+        )
+
+        assert result["memories_processed"] == 4
+        assert result["memories_failed"] == 0
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            rows = await conn.fetch(
+                "SELECT consolidated_at, consolidation_failed_at FROM memory_units "
+                "WHERE bank_id = $1 AND fact_type = 'experience'",
+                bank_id,
+            )
+        assert all(r["consolidated_at"] is not None for r in rows)
+        assert all(r["consolidation_failed_at"] is None for r in rows)
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+
+class TestConsolidationFailedAt:
+    """Verify that consolidation_failed_at is set — and consolidated_at is NOT — when all retries fail."""
+
+    @pytest.mark.asyncio
+    async def test_single_memory_permanent_failure(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """A single memory that exhausts all LLM retries gets consolidation_failed_at, not consolidated_at."""
+        bank_id = f"test-perm-fail-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            (mem_id,) = await _insert_memories(conn, bank_id, ["Carol enjoys painting watercolors."])
+
+        # Always fail
+        mock_llm = _make_failing_mock_llm(fail_first_n=999)
+        _inject_mock_llm(memory_no_llm_verify, mock_llm)
+
+        result = await run_consolidation_job(
+            memory_engine=memory_no_llm_verify,
+            bank_id=bank_id,
+            request_context=request_context,
+        )
+
+        assert result["memories_failed"] == 1
+        assert result["memories_processed"] == 1
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            row = await conn.fetchrow(
+                "SELECT consolidated_at, consolidation_failed_at FROM memory_units WHERE id = $1",
+                mem_id,
+            )
+
+        assert row["consolidated_at"] is None, "consolidated_at must NOT be set for a permanently failed memory"
+        assert row["consolidation_failed_at"] is not None, "consolidation_failed_at must be set"
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+    @pytest.mark.asyncio
+    async def test_failed_memory_excluded_from_next_run(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """A memory marked consolidation_failed_at is not re-processed on the next consolidation run."""
+        bank_id = f"test-excluded-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            (mem_id,) = await _insert_memories(conn, bank_id, ["Dave collects vinyl records."])
+            # Manually stamp consolidation_failed_at to simulate a prior failed run
+            await conn.execute(
+                "UPDATE memory_units SET consolidation_failed_at = NOW() WHERE id = $1",
+                mem_id,
+            )
+
+        # Even with a healthy LLM, the memory should be skipped
+        mock_llm = _make_always_success_mock_llm()
+        _inject_mock_llm(memory_no_llm_verify, mock_llm)
+
+        result = await run_consolidation_job(
+            memory_engine=memory_no_llm_verify,
+            bank_id=bank_id,
+            request_context=request_context,
+        )
+
+        # No unconsolidated memories to pick up (consolidation_failed_at ≠ NULL, consolidated_at = NULL
+        # but the SELECT filters on consolidated_at IS NULL AND fact_type IN ('experience','world'))
+        assert result["status"] in ("no_new_memories", "completed")
+        if result["status"] == "completed":
+            assert result["memories_processed"] == 0
+
+        # Memory still has consolidation_failed_at set and consolidated_at NULL
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            row = await conn.fetchrow(
+                "SELECT consolidated_at, consolidation_failed_at FROM memory_units WHERE id = $1",
+                mem_id,
+            )
+        assert row["consolidated_at"] is None
+        assert row["consolidation_failed_at"] is not None
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+    @pytest.mark.asyncio
+    async def test_partial_batch_failure(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """In a batch of 2, if only the first individual retry fails, the second still succeeds."""
+        bank_id = f"test-partial-fail-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            mem_ids = await _insert_memories(
+                conn,
+                bank_id,
+                [
+                    "Eve speaks three languages fluently.",
+                    "Eve learned Japanese in two years.",
+                ],
+            )
+
+        # Exhaust 3 retries for batch=2 (calls 1-3), exhaust 3 retries for first batch=1 (calls 4-6),
+        # second batch=1 succeeds (call 7)
+        mock_llm = _make_failing_mock_llm(fail_first_n=6)
+        _inject_mock_llm(memory_no_llm_verify, mock_llm)
+
+        result = await run_consolidation_job(
+            memory_engine=memory_no_llm_verify,
+            bank_id=bank_id,
+            request_context=request_context,
+        )
+
+        assert result["memories_processed"] == 2
+        assert result["memories_failed"] == 1
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            rows = {
+                str(r["id"]): r
+                for r in await conn.fetch(
+                    "SELECT id, consolidated_at, consolidation_failed_at FROM memory_units "
+                    "WHERE bank_id = $1 AND fact_type = 'experience'",
+                    bank_id,
+                )
+            }
+
+        # One should have failed, one should have succeeded
+        failed = [r for r in rows.values() if r["consolidation_failed_at"] is not None]
+        succeeded = [r for r in rows.values() if r["consolidated_at"] is not None]
+        assert len(failed) == 1
+        assert len(succeeded) == 1
+        # They must be different memories
+        assert str(failed[0]["id"]) != str(succeeded[0]["id"])
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+
+class TestRecoverConsolidation:
+    """Verify the retry_failed_consolidation() method and the /consolidation/recover endpoint."""
+
+    @pytest.mark.asyncio
+    async def test_recover_resets_failed_memories(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """retry_failed_consolidation resets consolidation_failed_at and consolidated_at."""
+        bank_id = f"test-recover-reset-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            ids = await _insert_memories(
+                conn,
+                bank_id,
+                [
+                    "Frank is a competitive cyclist.",
+                    "Frank completed the Tour de France route.",
+                ],
+            )
+            # Mark both as failed
+            for mem_id in ids:
+                await conn.execute(
+                    "UPDATE memory_units SET consolidation_failed_at = NOW() WHERE id = $1",
+                    mem_id,
+                )
+
+        result = await memory_no_llm_verify.retry_failed_consolidation(
+            bank_id, request_context=request_context
+        )
+
+        assert result["retried_count"] == 2
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            rows = await conn.fetch(
+                "SELECT consolidated_at, consolidation_failed_at FROM memory_units "
+                "WHERE bank_id = $1 AND fact_type = 'experience'",
+                bank_id,
+            )
+        assert all(r["consolidation_failed_at"] is None for r in rows), "consolidation_failed_at must be cleared"
+        assert all(r["consolidated_at"] is None for r in rows), "consolidated_at must also be cleared"
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+    @pytest.mark.asyncio
+    async def test_recover_returns_zero_when_none_failed(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """retry_failed_consolidation returns 0 when no memories have failed."""
+        bank_id = f"test-recover-zero-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        result = await memory_no_llm_verify.retry_failed_consolidation(
+            bank_id, request_context=request_context
+        )
+
+        assert result["retried_count"] == 0
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+    @pytest.mark.asyncio
+    async def test_recover_then_consolidate_succeeds(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """After recovery, the memory is picked up by the next consolidation run."""
+        bank_id = f"test-recover-consolidate-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            (mem_id,) = await _insert_memories(conn, bank_id, ["Grace is an expert rock climber."])
+            await conn.execute(
+                "UPDATE memory_units SET consolidation_failed_at = NOW() WHERE id = $1", mem_id
+            )
+
+        # Recover
+        recover_result = await memory_no_llm_verify.retry_failed_consolidation(
+            bank_id, request_context=request_context
+        )
+        assert recover_result["retried_count"] == 1
+
+        # Now consolidate with a healthy LLM
+        mock_llm = _make_always_success_mock_llm()
+        _inject_mock_llm(memory_no_llm_verify, mock_llm)
+
+        run_result = await run_consolidation_job(
+            memory_engine=memory_no_llm_verify,
+            bank_id=bank_id,
+            request_context=request_context,
+        )
+
+        assert run_result["memories_processed"] == 1
+        assert run_result["memories_failed"] == 0
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            row = await conn.fetchrow(
+                "SELECT consolidated_at, consolidation_failed_at FROM memory_units WHERE id = $1",
+                mem_id,
+            )
+        assert row["consolidated_at"] is not None, "Memory should be consolidated after recovery"
+        assert row["consolidation_failed_at"] is None
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)
+
+    @pytest.mark.asyncio
+    async def test_recover_endpoint_via_http(self, memory_no_llm_verify: MemoryEngine, request_context):
+        """The POST /consolidation/recover endpoint returns the correct retried_count."""
+        import httpx
+
+        from hindsight_api.api.http import create_app
+
+        bank_id = f"test-recover-http-{uuid.uuid4().hex[:8]}"
+        await memory_no_llm_verify.get_bank_profile(bank_id=bank_id, request_context=request_context)
+
+        async with memory_no_llm_verify._pool.acquire() as conn:
+            ids = await _insert_memories(
+                conn,
+                bank_id,
+                ["Henry is a professional chef.", "Henry trained at Le Cordon Bleu."],
+            )
+            for mem_id in ids:
+                await conn.execute(
+                    "UPDATE memory_units SET consolidation_failed_at = NOW() WHERE id = $1", mem_id
+                )
+
+        app = create_app(memory_no_llm_verify, initialize_memory=False)
+        transport = httpx.ASGITransport(app=app)
+        async with httpx.AsyncClient(transport=transport, base_url="http://test") as client:
+            response = await client.post(f"/v1/default/banks/{bank_id}/consolidation/recover")
+
+        assert response.status_code == 200
+        body = response.json()
+        assert body["retried_count"] == 2
+
+        await memory_no_llm_verify.delete_bank(bank_id, request_context=request_context)

From 5b10d35577f2d36a3ec098a39295ce25f3e2f125 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 17:23:47 +0100
Subject: [PATCH 05/18] chore: regenerate Go, Python, TypeScript clients with
 recover consolidation endpoint

---
 hindsight-clients/go/api/openapi.yaml         |  51 ++++
 hindsight-clients/go/api_banks.go             | 122 ++++++++
 .../model_recover_consolidation_response.go   | 158 ++++++++++
 .../python/.openapi-generator/FILES           |   1 +
 .../python/hindsight_client_api/__init__.py   |   1 +
 .../hindsight_client_api/api/banks_api.py     | 279 ++++++++++++++++++
 .../hindsight_client_api/models/__init__.py   |   1 +
 .../models/recover_consolidation_response.py  |  87 ++++++
 .../typescript/generated/sdk.gen.ts           |  17 ++
 .../typescript/generated/types.gen.ts         |  50 ++++
 10 files changed, 767 insertions(+)
 create mode 100644 hindsight-clients/go/model_recover_consolidation_response.go
 create mode 100644 hindsight-clients/python/hindsight_client_api/models/recover_consolidation_response.py

diff --git a/hindsight-clients/go/api/openapi.yaml b/hindsight-clients/go/api/openapi.yaml
index 85871a74..ba097ab0 100644
--- a/hindsight-clients/go/api/openapi.yaml
+++ b/hindsight-clients/go/api/openapi.yaml
@@ -2106,6 +2106,46 @@ paths:
       summary: Clear all observations
       tags:
       - Banks
+  /v1/default/banks/{bank_id}/consolidation/recover:
+    post:
+      description: Reset all memories that were permanently marked as failed during
+        consolidation (after exhausting all LLM retries and adaptive batch splitting)
+        so they are picked up again on the next consolidation run. Does not delete
+        any observations.
+      operationId: recover_consolidation
+      parameters:
+      - explode: false
+        in: path
+        name: bank_id
+        required: true
+        schema:
+          title: Bank Id
+          type: string
+        style: simple
+      - explode: false
+        in: header
+        name: authorization
+        required: false
+        schema:
+          nullable: true
+          type: string
+        style: simple
+      responses:
+        "200":
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RecoverConsolidationResponse'
+          description: Successful Response
+        "422":
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+          description: Validation Error
+      summary: Recover failed consolidation
+      tags:
+      - Banks
   /v1/default/banks/{bank_id}/memories/{memory_id}/observations:
     delete:
       description: Delete all observations derived from a specific memory and reset
@@ -4449,6 +4489,17 @@ components:
       - id
       - text
       title: RecallResult
+    RecoverConsolidationResponse:
+      description: Response model for recovering failed consolidation.
+      example:
+        retried_count: 42
+      properties:
+        retried_count:
+          title: Retried Count
+          type: integer
+      required:
+      - retried_count
+      title: RecoverConsolidationResponse
     ReflectBasedOn:
       description: "Evidence the response is based on: memories, mental models, and\
         \ directives."
diff --git a/hindsight-clients/go/api_banks.go b/hindsight-clients/go/api_banks.go
index 83a1ad4a..01073bf7 100644
--- a/hindsight-clients/go/api_banks.go
+++ b/hindsight-clients/go/api_banks.go
@@ -1023,6 +1023,128 @@ func (a *BanksAPIService) ListBanksExecute(r ApiListBanksRequest) (*BankListResp
 	return localVarReturnValue, localVarHTTPResponse, nil
 }
 
+type ApiRecoverConsolidationRequest struct {
+	ctx context.Context
+	ApiService *BanksAPIService
+	bankId string
+	authorization *string
+}
+
+func (r ApiRecoverConsolidationRequest) Authorization(authorization string) ApiRecoverConsolidationRequest {
+	r.authorization = &authorization
+	return r
+}
+
+func (r ApiRecoverConsolidationRequest) Execute() (*RecoverConsolidationResponse, *http.Response, error) {
+	return r.ApiService.RecoverConsolidationExecute(r)
+}
+
+/*
+RecoverConsolidation Recover failed consolidation
+
+Reset all memories that were permanently marked as failed during consolidation (after exhausting all LLM retries and adaptive batch splitting) so they are picked up again on the next consolidation run. Does not delete any observations.
+
+ @param ctx context.Context - for authentication, logging, cancellation, deadlines, tracing, etc. Passed from http.Request or context.Background().
+ @param bankId
+ @return ApiRecoverConsolidationRequest
+*/
+func (a *BanksAPIService) RecoverConsolidation(ctx context.Context, bankId string) ApiRecoverConsolidationRequest {
+	return ApiRecoverConsolidationRequest{
+		ApiService: a,
+		ctx: ctx,
+		bankId: bankId,
+	}
+}
+
+// Execute executes the request
+//  @return RecoverConsolidationResponse
+func (a *BanksAPIService) RecoverConsolidationExecute(r ApiRecoverConsolidationRequest) (*RecoverConsolidationResponse, *http.Response, error) {
+	var (
+		localVarHTTPMethod   = http.MethodPost
+		localVarPostBody     interface{}
+		formFiles            []formFile
+		localVarReturnValue  *RecoverConsolidationResponse
+	)
+
+	localBasePath, err := a.client.cfg.ServerURLWithContext(r.ctx, "BanksAPIService.RecoverConsolidation")
+	if err != nil {
+		return localVarReturnValue, nil, &GenericOpenAPIError{error: err.Error()}
+	}
+
+	localVarPath := localBasePath + "/v1/default/banks/{bank_id}/consolidation/recover"
+	localVarPath = strings.Replace(localVarPath, "{"+"bank_id"+"}", url.PathEscape(parameterValueToString(r.bankId, "bankId")), -1)
+
+	localVarHeaderParams := make(map[string]string)
+	localVarQueryParams := url.Values{}
+	localVarFormParams := url.Values{}
+
+	// to determine the Content-Type header
+	localVarHTTPContentTypes := []string{}
+
+	// set Content-Type header
+	localVarHTTPContentType := selectHeaderContentType(localVarHTTPContentTypes)
+	if localVarHTTPContentType != "" {
+		localVarHeaderParams["Content-Type"] = localVarHTTPContentType
+	}
+
+	// to determine the Accept header
+	localVarHTTPHeaderAccepts := []string{"application/json"}
+
+	// set Accept header
+	localVarHTTPHeaderAccept := selectHeaderAccept(localVarHTTPHeaderAccepts)
+	if localVarHTTPHeaderAccept != "" {
+		localVarHeaderParams["Accept"] = localVarHTTPHeaderAccept
+	}
+	if r.authorization != nil {
+		parameterAddToHeaderOrQuery(localVarHeaderParams, "authorization", r.authorization, "simple", "")
+	}
+	req, err := a.client.prepareRequest(r.ctx, localVarPath, localVarHTTPMethod, localVarPostBody, localVarHeaderParams, localVarQueryParams, localVarFormParams, formFiles)
+	if err != nil {
+		return localVarReturnValue, nil, err
+	}
+
+	localVarHTTPResponse, err := a.client.callAPI(req)
+	if err != nil || localVarHTTPResponse == nil {
+		return localVarReturnValue, localVarHTTPResponse, err
+	}
+
+	localVarBody, err := io.ReadAll(localVarHTTPResponse.Body)
+	localVarHTTPResponse.Body.Close()
+	localVarHTTPResponse.Body = io.NopCloser(bytes.NewBuffer(localVarBody))
+	if err != nil {
+		return localVarReturnValue, localVarHTTPResponse, err
+	}
+
+	if localVarHTTPResponse.StatusCode >= 300 {
+		newErr := &GenericOpenAPIError{
+			body:  localVarBody,
+			error: localVarHTTPResponse.Status,
+		}
+		if localVarHTTPResponse.StatusCode == 422 {
+			var v HTTPValidationError
+			err = a.client.decode(&v, localVarBody, localVarHTTPResponse.Header.Get("Content-Type"))
+			if err != nil {
+				newErr.error = err.Error()
+				return localVarReturnValue, localVarHTTPResponse, newErr
+			}
+					newErr.error = formatErrorMessage(localVarHTTPResponse.Status, &v)
+					newErr.model = v
+		}
+		return localVarReturnValue, localVarHTTPResponse, newErr
+	}
+
+	err = a.client.decode(&localVarReturnValue, localVarBody, localVarHTTPResponse.Header.Get("Content-Type"))
+	if err != nil {
+		newErr := &GenericOpenAPIError{
+			body:  localVarBody,
+			error: err.Error(),
+		}
+		return localVarReturnValue, localVarHTTPResponse, newErr
+	}
+
+	return localVarReturnValue, localVarHTTPResponse, nil
+}
+
 type ApiResetBankConfigRequest struct {
 	ctx context.Context
 	ApiService *BanksAPIService
diff --git a/hindsight-clients/go/model_recover_consolidation_response.go b/hindsight-clients/go/model_recover_consolidation_response.go
new file mode 100644
index 00000000..eeeb703f
--- /dev/null
+++ b/hindsight-clients/go/model_recover_consolidation_response.go
@@ -0,0 +1,158 @@
+/*
+Hindsight HTTP API
+
+HTTP API for Hindsight
+
+API version: 0.4.18
+*/
+
+// Code generated by OpenAPI Generator (https://openapi-generator.tech); DO NOT EDIT.
+
+package hindsight
+
+import (
+	"encoding/json"
+	"bytes"
+	"fmt"
+)
+
+// checks if the RecoverConsolidationResponse type satisfies the MappedNullable interface at compile time
+var _ MappedNullable = &RecoverConsolidationResponse{}
+
+// RecoverConsolidationResponse Response model for recovering failed consolidation.
+type RecoverConsolidationResponse struct {
+	RetriedCount int32 `json:"retried_count"`
+}
+
+type _RecoverConsolidationResponse RecoverConsolidationResponse
+
+// NewRecoverConsolidationResponse instantiates a new RecoverConsolidationResponse object
+// This constructor will assign default values to properties that have it defined,
+// and makes sure properties required by API are set, but the set of arguments
+// will change when the set of required properties is changed
+func NewRecoverConsolidationResponse(retriedCount int32) *RecoverConsolidationResponse {
+	this := RecoverConsolidationResponse{}
+	this.RetriedCount = retriedCount
+	return &this
+}
+
+// NewRecoverConsolidationResponseWithDefaults instantiates a new RecoverConsolidationResponse object
+// This constructor will only assign default values to properties that have it defined,
+// but it doesn't guarantee that properties required by API are set
+func NewRecoverConsolidationResponseWithDefaults() *RecoverConsolidationResponse {
+	this := RecoverConsolidationResponse{}
+	return &this
+}
+
+// GetRetriedCount returns the RetriedCount field value
+func (o *RecoverConsolidationResponse) GetRetriedCount() int32 {
+	if o == nil {
+		var ret int32
+		return ret
+	}
+
+	return o.RetriedCount
+}
+
+// GetRetriedCountOk returns a tuple with the RetriedCount field value
+// and a boolean to check if the value has been set.
+func (o *RecoverConsolidationResponse) GetRetriedCountOk() (*int32, bool) {
+	if o == nil {
+		return nil, false
+	}
+	return &o.RetriedCount, true
+}
+
+// SetRetriedCount sets field value
+func (o *RecoverConsolidationResponse) SetRetriedCount(v int32) {
+	o.RetriedCount = v
+}
+
+func (o RecoverConsolidationResponse) MarshalJSON() ([]byte, error) {
+	toSerialize,err := o.ToMap()
+	if err != nil {
+		return []byte{}, err
+	}
+	return json.Marshal(toSerialize)
+}
+
+func (o RecoverConsolidationResponse) ToMap() (map[string]interface{}, error) {
+	toSerialize := map[string]interface{}{}
+	toSerialize["retried_count"] = o.RetriedCount
+	return toSerialize, nil
+}
+
+func (o *RecoverConsolidationResponse) UnmarshalJSON(data []byte) (err error) {
+	// This validates that all required properties are included in the JSON object
+	// by unmarshalling the object into a generic map with string keys and checking
+	// that every required field exists as a key in the generic map.
+	requiredProperties := []string{
+		"retried_count",
+	}
+
+	allProperties := make(map[string]interface{})
+
+	err = json.Unmarshal(data, &allProperties)
+
+	if err != nil {
+		return err;
+	}
+
+	for _, requiredProperty := range(requiredProperties) {
+		if _, exists := allProperties[requiredProperty]; !exists {
+			return fmt.Errorf("no value given for required property %v", requiredProperty)
+		}
+	}
+
+	varRecoverConsolidationResponse := _RecoverConsolidationResponse{}
+
+	decoder := json.NewDecoder(bytes.NewReader(data))
+	decoder.DisallowUnknownFields()
+	err = decoder.Decode(&varRecoverConsolidationResponse)
+
+	if err != nil {
+		return err
+	}
+
+	*o = RecoverConsolidationResponse(varRecoverConsolidationResponse)
+
+	return err
+}
+
+type NullableRecoverConsolidationResponse struct {
+	value *RecoverConsolidationResponse
+	isSet bool
+}
+
+func (v NullableRecoverConsolidationResponse) Get() *RecoverConsolidationResponse {
+	return v.value
+}
+
+func (v *NullableRecoverConsolidationResponse) Set(val *RecoverConsolidationResponse) {
+	v.value = val
+	v.isSet = true
+}
+
+func (v NullableRecoverConsolidationResponse) IsSet() bool {
+	return v.isSet
+}
+
+func (v *NullableRecoverConsolidationResponse) Unset() {
+	v.value = nil
+	v.isSet = false
+}
+
+func NewNullableRecoverConsolidationResponse(val *RecoverConsolidationResponse) *NullableRecoverConsolidationResponse {
+	return &NullableRecoverConsolidationResponse{value: val, isSet: true}
+}
+
+func (v NullableRecoverConsolidationResponse) MarshalJSON() ([]byte, error) {
+	return json.Marshal(v.value)
+}
+
+func (v *NullableRecoverConsolidationResponse) UnmarshalJSON(src []byte) error {
+	v.isSet = true
+	return json.Unmarshal(src, &v.value)
+}
+
+
diff --git a/hindsight-clients/python/.openapi-generator/FILES b/hindsight-clients/python/.openapi-generator/FILES
index 2223ebc7..cbd2d1bb 100644
--- a/hindsight-clients/python/.openapi-generator/FILES
+++ b/hindsight-clients/python/.openapi-generator/FILES
@@ -71,6 +71,7 @@ hindsight_client_api/models/recall_request.py
 hindsight_client_api/models/recall_request_tag_groups_inner.py
 hindsight_client_api/models/recall_response.py
 hindsight_client_api/models/recall_result.py
+hindsight_client_api/models/recover_consolidation_response.py
 hindsight_client_api/models/reflect_based_on.py
 hindsight_client_api/models/reflect_directive.py
 hindsight_client_api/models/reflect_fact.py
diff --git a/hindsight-clients/python/hindsight_client_api/__init__.py b/hindsight-clients/python/hindsight_client_api/__init__.py
index 42fb42cd..59fa9f2b 100644
--- a/hindsight-clients/python/hindsight_client_api/__init__.py
+++ b/hindsight-clients/python/hindsight_client_api/__init__.py
@@ -96,6 +96,7 @@
 from hindsight_client_api.models.recall_request_tag_groups_inner import RecallRequestTagGroupsInner
 from hindsight_client_api.models.recall_response import RecallResponse
 from hindsight_client_api.models.recall_result import RecallResult
+from hindsight_client_api.models.recover_consolidation_response import RecoverConsolidationResponse
 from hindsight_client_api.models.reflect_based_on import ReflectBasedOn
 from hindsight_client_api.models.reflect_directive import ReflectDirective
 from hindsight_client_api.models.reflect_fact import ReflectFact
diff --git a/hindsight-clients/python/hindsight_client_api/api/banks_api.py b/hindsight-clients/python/hindsight_client_api/api/banks_api.py
index 10271033..dcdbf746 100644
--- a/hindsight-clients/python/hindsight_client_api/api/banks_api.py
+++ b/hindsight-clients/python/hindsight_client_api/api/banks_api.py
@@ -28,6 +28,7 @@
 from hindsight_client_api.models.consolidation_response import ConsolidationResponse
 from hindsight_client_api.models.create_bank_request import CreateBankRequest
 from hindsight_client_api.models.delete_response import DeleteResponse
+from hindsight_client_api.models.recover_consolidation_response import RecoverConsolidationResponse
 from hindsight_client_api.models.update_disposition_request import UpdateDispositionRequest
 
 from hindsight_client_api.api_client import ApiClient, RequestSerialized
@@ -2319,6 +2320,284 @@ def _list_banks_serialize(
 
 
+    @validate_call
+    async def recover_consolidation(
+        self,
+        bank_id: StrictStr,
+        authorization: Optional[StrictStr] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RecoverConsolidationResponse:
+        """Recover failed consolidation
+
+        Reset all memories that were permanently marked as failed during consolidation (after exhausting all LLM retries and adaptive batch splitting) so they are picked up again on the next consolidation run. Does not delete any observations.
+
+        :param bank_id: (required)
+        :type bank_id: str
+        :param authorization:
+        :type authorization: str
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._recover_consolidation_serialize(
+            bank_id=bank_id,
+            authorization=authorization,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "RecoverConsolidationResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = await self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        await response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        ).data
+
+
+    @validate_call
+    async def recover_consolidation_with_http_info(
+        self,
+        bank_id: StrictStr,
+        authorization: Optional[StrictStr] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> ApiResponse[RecoverConsolidationResponse]:
+        """Recover failed consolidation
+
+        Reset all memories that were permanently marked as failed during consolidation (after exhausting all LLM retries and adaptive batch splitting) so they are picked up again on the next consolidation run. Does not delete any observations.
+
+        :param bank_id: (required)
+        :type bank_id: str
+        :param authorization:
+        :type authorization: str
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._recover_consolidation_serialize(
+            bank_id=bank_id,
+            authorization=authorization,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "RecoverConsolidationResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = await self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        await response_data.read()
+        return self.api_client.response_deserialize(
+            response_data=response_data,
+            response_types_map=_response_types_map,
+        )
+
+
+    @validate_call
+    async def recover_consolidation_without_preload_content(
+        self,
+        bank_id: StrictStr,
+        authorization: Optional[StrictStr] = None,
+        _request_timeout: Union[
+            None,
+            Annotated[StrictFloat, Field(gt=0)],
+            Tuple[
+                Annotated[StrictFloat, Field(gt=0)],
+                Annotated[StrictFloat, Field(gt=0)]
+            ]
+        ] = None,
+        _request_auth: Optional[Dict[StrictStr, Any]] = None,
+        _content_type: Optional[StrictStr] = None,
+        _headers: Optional[Dict[StrictStr, Any]] = None,
+        _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
+    ) -> RESTResponseType:
+        """Recover failed consolidation
+
+        Reset all memories that were permanently marked as failed during consolidation (after exhausting all LLM retries and adaptive batch splitting) so they are picked up again on the next consolidation run. Does not delete any observations.
+
+        :param bank_id: (required)
+        :type bank_id: str
+        :param authorization:
+        :type authorization: str
+        :param _request_timeout: timeout setting for this request. If one
+                                 number provided, it will be total request
+                                 timeout. It can also be a pair (tuple) of
+                                 (connection, read) timeouts.
+        :type _request_timeout: int, tuple(int, int), optional
+        :param _request_auth: set to override the auth_settings for an a single
+                              request; this effectively ignores the
+                              authentication in the spec for a single request.
+        :type _request_auth: dict, optional
+        :param _content_type: force content-type for the request.
+        :type _content_type: str, Optional
+        :param _headers: set to override the headers for a single
+                         request; this effectively ignores the headers
+                         in the spec for a single request.
+        :type _headers: dict, optional
+        :param _host_index: set to override the host_index for a single
+                            request; this effectively ignores the host_index
+                            in the spec for a single request.
+        :type _host_index: int, optional
+        :return: Returns the result object.
+        """ # noqa: E501
+
+        _param = self._recover_consolidation_serialize(
+            bank_id=bank_id,
+            authorization=authorization,
+            _request_auth=_request_auth,
+            _content_type=_content_type,
+            _headers=_headers,
+            _host_index=_host_index
+        )
+
+        _response_types_map: Dict[str, Optional[str]] = {
+            '200': "RecoverConsolidationResponse",
+            '422': "HTTPValidationError",
+        }
+        response_data = await self.api_client.call_api(
+            *_param,
+            _request_timeout=_request_timeout
+        )
+        return response_data.response
+
+
+    def _recover_consolidation_serialize(
+        self,
+        bank_id,
+        authorization,
+        _request_auth,
+        _content_type,
+        _headers,
+        _host_index,
+    ) -> RequestSerialized:
+
+        _host = None
+
+        _collection_formats: Dict[str, str] = {
+        }
+
+        _path_params: Dict[str, str] = {}
+        _query_params: List[Tuple[str, str]] = []
+        _header_params: Dict[str, Optional[str]] = _headers or {}
+        _form_params: List[Tuple[str, str]] = []
+        _files: Dict[
+            str, Union[str, bytes, List[str], List[bytes], List[Tuple[str, bytes]]]
+        ] = {}
+        _body_params: Optional[bytes] = None
+
+        # process the path parameters
+        if bank_id is not None:
+            _path_params['bank_id'] = bank_id
+        # process the query parameters
+        # process the header parameters
+        if authorization is not None:
+            _header_params['authorization'] = authorization
+        # process the form parameters
+        # process the body parameter
+
+
+        # set the HTTP header `Accept`
+        if 'Accept' not in _header_params:
+            _header_params['Accept'] = self.api_client.select_header_accept(
+                [
+                    'application/json'
+                ]
+            )
+
+
+        # authentication setting
+        _auth_settings: List[str] = [
+        ]
+
+        return self.api_client.param_serialize(
+            method='POST',
+            resource_path='/v1/default/banks/{bank_id}/consolidation/recover',
+            path_params=_path_params,
+            query_params=_query_params,
+            header_params=_header_params,
+            body=_body_params,
+            post_params=_form_params,
+            files=_files,
+            auth_settings=_auth_settings,
+            collection_formats=_collection_formats,
+            _host=_host,
+            _request_auth=_request_auth
+        )
+
+
+
+
     @validate_call
     async def reset_bank_config(
         self,
diff --git a/hindsight-clients/python/hindsight_client_api/models/__init__.py b/hindsight-clients/python/hindsight_client_api/models/__init__.py
index 3678ad40..5ee4d220 100644
--- a/hindsight-clients/python/hindsight_client_api/models/__init__.py
+++ b/hindsight-clients/python/hindsight_client_api/models/__init__.py
@@ -70,6 +70,7 @@
 from hindsight_client_api.models.recall_request_tag_groups_inner import RecallRequestTagGroupsInner
 from hindsight_client_api.models.recall_response import RecallResponse
 from hindsight_client_api.models.recall_result import RecallResult
+from hindsight_client_api.models.recover_consolidation_response import RecoverConsolidationResponse
 from hindsight_client_api.models.reflect_based_on import ReflectBasedOn
 from hindsight_client_api.models.reflect_directive import ReflectDirective
 from hindsight_client_api.models.reflect_fact import ReflectFact
diff --git a/hindsight-clients/python/hindsight_client_api/models/recover_consolidation_response.py b/hindsight-clients/python/hindsight_client_api/models/recover_consolidation_response.py
new file mode 100644
index 00000000..4c3c43eb
--- /dev/null
+++ b/hindsight-clients/python/hindsight_client_api/models/recover_consolidation_response.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+
+"""
+    Hindsight HTTP API
+
+    HTTP API for Hindsight
+
+    The version of the OpenAPI document: 0.4.18
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+
+    Do not edit the class manually.
+"""  # noqa: E501
+
+
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+
+from pydantic import BaseModel, ConfigDict, StrictInt
+from typing import Any, ClassVar, Dict, List
+from typing import Optional, Set
+from typing_extensions import Self
+
+class RecoverConsolidationResponse(BaseModel):
+    """
+    Response model for recovering failed consolidation.
+    """ # noqa: E501
+    retried_count: StrictInt
+    __properties: ClassVar[List[str]] = ["retried_count"]
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+
+
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of RecoverConsolidationResponse from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        return _dict
+
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of RecoverConsolidationResponse from a dict"""
+        if obj is None:
+            return None
+
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+
+        _obj = cls.model_validate({
+            "retried_count": obj.get("retried_count")
+        })
+        return _obj
+
+
diff --git a/hindsight-clients/typescript/generated/sdk.gen.ts b/hindsight-clients/typescript/generated/sdk.gen.ts
index 483793ed..ead1fa77 100644
--- a/hindsight-clients/typescript/generated/sdk.gen.ts
+++ b/hindsight-clients/typescript/generated/sdk.gen.ts
@@ -131,6 +131,9 @@ import type {
   RecallMemoriesData,
   RecallMemoriesErrors,
   RecallMemoriesResponses,
+  RecoverConsolidationData,
+  RecoverConsolidationErrors,
+  RecoverConsolidationResponses,
   ReflectData,
   ReflectErrors,
   ReflectResponses,
@@ -935,6 +938,20 @@ export const clearObservations = <ThrowOnError extends boolean = false>(
     ThrowOnError
   >({ url: "/v1/default/banks/{bank_id}/observations", ...options });
 
+/**
+ * Recover failed consolidation
+ *
+ * Reset all memories that were permanently marked as failed during consolidation (after exhausting all LLM retries and adaptive batch splitting) so they are picked up again on the next consolidation run. Does not delete any observations.
+ */
+export const recoverConsolidation = <ThrowOnError extends boolean = false>(
+  options: Options<RecoverConsolidationData, ThrowOnError>,
+) =>
+  (options.client ?? client).post<
+    RecoverConsolidationResponses,
+    RecoverConsolidationErrors,
+    ThrowOnError
+  >({ url: "/v1/default/banks/{bank_id}/consolidation/recover", ...options });
+
 /**
  * Clear observations for a memory
  *
diff --git a/hindsight-clients/typescript/generated/types.gen.ts b/hindsight-clients/typescript/generated/types.gen.ts
index ff163810..7ecbd4e9 100644
--- a/hindsight-clients/typescript/generated/types.gen.ts
+++ b/hindsight-clients/typescript/generated/types.gen.ts
@@ -1608,6 +1608,18 @@ export type RecallResult = {
   source_fact_ids?: Array<string> | null;
 };
 
+/**
+ * RecoverConsolidationResponse
+ *
+ * Response model for recovering failed consolidation.
+ */
+export type RecoverConsolidationResponse = {
+  /**
+   * Retried Count
+   */
+  retried_count: number;
+};
+
 /**
  * ReflectBasedOn
  *
@@ -4245,6 +4257,44 @@ export type ClearObservationsResponses = {
 export type ClearObservationsResponse =
   ClearObservationsResponses[keyof ClearObservationsResponses];
 
+export type RecoverConsolidationData = {
+  body?: never;
+  headers?: {
+    /**
+     * Authorization
+     */
+    authorization?: string | null;
+  };
+  path: {
+    /**
+     * Bank Id
+     */
+    bank_id: string;
+  };
+  query?: never;
+  url: "/v1/default/banks/{bank_id}/consolidation/recover";
+};
+
+export type RecoverConsolidationErrors = {
+  /**
+   * Validation Error
+   */
+  422: HttpValidationError;
+};
+
+export type RecoverConsolidationError =
+  RecoverConsolidationErrors[keyof RecoverConsolidationErrors];
+
+export type RecoverConsolidationResponses = {
+  /**
+   * Successful Response
+   */
+  200: RecoverConsolidationResponse;
+};
+
+export type RecoverConsolidationResponse2 =
+  RecoverConsolidationResponses[keyof RecoverConsolidationResponses];
+
 export type ClearMemoryObservationsData = {
   body?: never;
   headers?: {

From fb10c463860aaca204059110f97d921def9c0035 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 17:30:39 +0100
Subject: [PATCH 06/18] feat: add Recover Consolidation action to bank Actions
 dropdown

---
 .../[bankId]/consolidation-recover/route.ts   | 27 +++++++++++++++
 .../src/app/banks/[bankId]/page.tsx           | 34 +++++++++++++++++++
 hindsight-control-plane/src/lib/api.ts        | 11 ++++++
 3 files changed, 72 insertions(+)
 create mode 100644 hindsight-control-plane/src/app/api/banks/[bankId]/consolidation-recover/route.ts

diff --git a/hindsight-control-plane/src/app/api/banks/[bankId]/consolidation-recover/route.ts b/hindsight-control-plane/src/app/api/banks/[bankId]/consolidation-recover/route.ts
new file mode 100644
index 00000000..42eabf04
--- /dev/null
+++ b/hindsight-control-plane/src/app/api/banks/[bankId]/consolidation-recover/route.ts
@@ -0,0 +1,27 @@
+import { NextResponse } from "next/server";
+import { sdk, lowLevelClient } from "@/lib/hindsight-client";
+
+export async function POST(request: Request, { params }: { params: Promise<{ bankId: string }> }) {
+  try {
+    const { bankId } = await params;
+
+    if (!bankId) {
+      return NextResponse.json({ error: "bank_id is required" }, { status: 400 });
+    }
+
+    const response = await sdk.recoverConsolidation({
+      client: lowLevelClient,
+      path: { bank_id: bankId },
+    });
+
+    if (response.error) {
+      console.error("API error recovering consolidation:", response.error);
+      return NextResponse.json({ error: "Failed to recover consolidation" }, { status: 500 });
+    }
+
+    return NextResponse.json(response.data, { status: 200 });
+  } catch (error) {
+    console.error("Error recovering consolidation:", error);
+    return NextResponse.json({ error: "Failed to recover consolidation" }, { status: 500 });
+  }
+}
diff --git a/hindsight-control-plane/src/app/banks/[bankId]/page.tsx b/hindsight-control-plane/src/app/banks/[bankId]/page.tsx
index e12e2da5..a4a213db 100644
--- a/hindsight-control-plane/src/app/banks/[bankId]/page.tsx
+++ b/hindsight-control-plane/src/app/banks/[bankId]/page.tsx
@@ -62,6 +62,7 @@ export default function BankPage() {
   const [showClearObservationsDialog, setShowClearObservationsDialog] = useState(false);
   const [isClearingObservations, setIsClearingObservations] = useState(false);
   const [isConsolidating, setIsConsolidating] = useState(false);
+  const [isRecoveringConsolidation, setIsRecoveringConsolidation] = useState(false);
   const [showResetConfigDialog, setShowResetConfigDialog] = useState(false);
   const [isResettingConfig, setIsResettingConfig] = useState(false);
 
@@ -137,6 +138,22 @@ export default function BankPage() {
     }
   };
 
+  const handleRecoverConsolidation = async () => {
+    if (!bankId) return;
+
+    setIsRecoveringConsolidation(true);
+    try {
+      const result = await client.recoverConsolidation(bankId);
+      toast.success(
+        `Recovered ${result.retried_count} failed ${result.retried_count === 1 ? "memory" : "memories"} for re-consolidation`
+      );
+    } catch (error) {
+      // Error toast is shown automatically by the API client interceptor
+    } finally {
+      setIsRecoveringConsolidation(false);
+    }
+  };
+
   return (
     <div className="min-h-screen bg-background flex flex-col">
       <BankSelector />
@@ -181,6 +198,23 @@ export default function BankPage() {
                           <span className="ml-auto text-xs text-muted-foreground">Off</span>
                         )}
                       </DropdownMenuItem>
+                      <DropdownMenuItem
+                        onClick={handleRecoverConsolidation}
+                        disabled={isRecoveringConsolidation || !observationsEnabled}
+                        title={
+                          !observationsEnabled ? "Observations feature is not enabled" : undefined
+                        }
+                      >
+                        {isRecoveringConsolidation ? (
+                          <Loader2 className="w-4 h-4 mr-2 animate-spin" />
+                        ) : (
+                          <RotateCcw className="w-4 h-4 mr-2" />
+                        )}
+                        {isRecoveringConsolidation ? "Recovering..." : "Recover Consolidation"}
+                        {!observationsEnabled && (
+                          <span className="ml-auto text-xs text-muted-foreground">Off</span>
+                        )}
+                      </DropdownMenuItem>
                       <DropdownMenuItem
                         onClick={() => setShowClearObservationsDialog(true)}
                         disabled={!observationsEnabled}
diff --git a/hindsight-control-plane/src/lib/api.ts b/hindsight-control-plane/src/lib/api.ts
index 9178ecf1..521713b0 100644
--- a/hindsight-control-plane/src/lib/api.ts
+++ b/hindsight-control-plane/src/lib/api.ts
@@ -417,6 +417,17 @@ export class ControlPlaneClient {
     });
   }
 
+  /**
+   * Recover failed consolidation for a bank (reset memories marked consolidation_failed_at)
+   */
+  async recoverConsolidation(bankId: string) {
+    return this.fetchApi<{
+      retried_count: number;
+    }>(`/api/banks/${bankId}/consolidation-recover`, {
+      method: "POST",
+    });
+  }
+
   /**
    * Get chunk
    */

From 79577aa419633e9fd7adc0dc6f0dec171cde6623 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 17:32:51 +0100
Subject: [PATCH 07/18] style: apply ruff formatting to http.py and config.py

---
 hindsight-api-slim/hindsight_api/api/http.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
index ec397c21..567ef1e6 100644
--- a/hindsight-api-slim/hindsight_api/api/http.py
+++ b/hindsight-api-slim/hindsight_api/api/http.py
@@ -3922,9 +3922,7 @@ async def api_clear_observations(bank_id: str, request_context: RequestContext =
         operation_id="recover_consolidation",
         tags=["Banks"],
     )
-    async def api_recover_consolidation(
-        bank_id: str, request_context: RequestContext = Depends(get_request_context)
-    ):
+    async def api_recover_consolidation(bank_id: str, request_context: RequestContext = Depends(get_request_context)):
         """Reset consolidation-failed memories for recovery."""
         try:
             result = await app.state.memory.retry_failed_consolidation(bank_id, request_context=request_context)

From d72f9b89646ce460503c27d0d7dd90ca0e45582d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 18:01:25 +0100
Subject: [PATCH 08/18] fix: handle consolidation scope in large batch test
 mock LLM

The mock LLM was returning {"facts": ...} for ALL calls including consolidation.
Consolidation doesn't use skip_validation=True so it expects a _ConsolidationBatchResponse
instance, not a raw dict. Before this PR consolidation silently swallowed the AttributeError
(failed=False was returned); now failed=True triggers adaptive splitting and timeouts.

Fix: return _ConsolidationBatchResponse() when scope=="consolidation".
---
 .../tests/test_load_large_batch.py            | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/hindsight-api-slim/tests/test_load_large_batch.py b/hindsight-api-slim/tests/test_load_large_batch.py
index 4f499aa1..70c212d7 100644
--- a/hindsight-api-slim/tests/test_load_large_batch.py
+++ b/hindsight-api-slim/tests/test_load_large_batch.py
@@ -149,6 +149,16 @@ async def test_large_batch_500k_chars_20_items(self, memory_with_mock_llm, reque
         call_tracker = {"count": 0, "facts": 0}
 
         async def mock_llm_call(*args, **kwargs):
+            from hindsight_api.engine.consolidation.consolidator import _ConsolidationBatchResponse
+
+            # Consolidation calls expect a _ConsolidationBatchResponse (not a raw dict),
+            # because consolidation does NOT use skip_validation=True.
+            if kwargs.get("scope") == "consolidation":
+                return_usage = kwargs.get("return_usage", False)
+                if return_usage:
+                    return _ConsolidationBatchResponse(), TokenUsage(input_tokens=0, output_tokens=0)
+                return _ConsolidationBatchResponse()
+
             call_tracker["count"] += 1
 
             # Extract the content from the user message to generate proportional facts
@@ -157,7 +167,7 @@ async def mock_llm_call(*args, **kwargs):
             mock_facts = create_mock_facts_from_content(user_msg, ratio=1.5)
             call_tracker["facts"] += len(mock_facts)
 
-            # Return a dict (parsed JSON) since skip_validation=True but the code expects a dict
+            # Return a dict (parsed JSON) — fact extraction uses skip_validation=True
             response_dict = {"facts": mock_facts}
 
             return_usage = kwargs.get("return_usage", False)
@@ -236,6 +246,14 @@ async def test_batch_chunking_behavior(self, memory_with_mock_llm, request_conte
         logger.info(f"Created {num_items} items with {actual_total_chars:,} chars (should trigger chunking)")
 
         async def mock_llm_call(*args, **kwargs):
+            from hindsight_api.engine.consolidation.consolidator import _ConsolidationBatchResponse
+
+            if kwargs.get("scope") == "consolidation":
+                return_usage = kwargs.get("return_usage", False)
+                if return_usage:
+                    return _ConsolidationBatchResponse(), TokenUsage(input_tokens=0, output_tokens=0)
+                return _ConsolidationBatchResponse()
+
             messages = kwargs.get("messages", args[0] if args else [])
             user_msg = messages[-1]["content"] if messages else ""
             mock_facts = create_mock_facts_from_content(user_msg, ratio=1.0)

From e6948baea42716c3a70ede0b09ccaf24b2da42ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 18:11:47 +0100
Subject: [PATCH 09/18] fix: restrict claude-agent-sdk to macOS platform only
 (no Linux wheel available)

Also fix pre-existing type errors: use setattr for XLM-RoBERTa monkey-patch
and add missing reranker_local_fp16/bucket_batching/batch_size fields to main.py config constructor.
---
 hindsight-api-slim/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hindsight-api-slim/pyproject.toml b/hindsight-api-slim/pyproject.toml
index 8a6f3e08..5636a50c 100644
--- a/hindsight-api-slim/pyproject.toml
+++ b/hindsight-api-slim/pyproject.toml
@@ -55,7 +55,7 @@ dependencies = [
     "filelock>=3.20.1", # TOCTOU race condition fix
     "authlib>=1.6.6", # Account takeover vulnerability fix
     "aiohttp>=3.13.3", # Multiple DoS vulnerabilities
-    "claude-agent-sdk>=0.1.27",
+    "claude-agent-sdk>=0.1.27; sys_platform == 'darwin'",
 ]
 
 [project.optional-dependencies]

From b5141458de70184d495ab85229727100f4805f6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 18:21:50 +0100
Subject: [PATCH 10/18] fix: add UV_INDEX_STRATEGY=unsafe-best-match to fix
 markupsafe cp314 wheel conflict

PyTorch CPU index serves markupsafe==3.0.3 with only cp314 wheels.
uv's default first-index strategy stops at the first index with any version
even if no compatible wheel exists. unsafe-best-match searches all indices
for the best compatible wheel, falling back to PyPI for markupsafe.
---
 .github/workflows/test.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1a1d4187..1c08a664 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -200,6 +200,7 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -422,6 +423,7 @@ jobs:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -485,6 +487,7 @@ jobs:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -588,6 +591,7 @@ jobs:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -720,6 +724,7 @@ jobs:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -827,6 +832,7 @@ jobs:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -932,6 +938,7 @@ jobs:
       HINDSIGHT_EMBED_PACKAGE_PATH: ${{ github.workspace }}/hindsight-embed
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1039,6 +1046,7 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1283,6 +1291,7 @@ jobs:
       HINDSIGHT_API_LLM_MODEL: google/gemini-2.5-flash-lite
       # Prefer CPU-only PyTorch in CI
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1341,6 +1350,7 @@ jobs:
       HINDSIGHT_LLM_MODEL: google/gemini-2.5-flash-lite
       # Prefer CPU-only PyTorch in CI
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1397,6 +1407,7 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1531,6 +1542,7 @@ jobs:
       HINDSIGHT_API_LLM_MODEL: google/gemini-2.5-flash-lite
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1605,6 +1617,7 @@ jobs:
     runs-on: ubuntu-latest
     env:
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1677,6 +1690,7 @@ jobs:
     runs-on: ubuntu-latest
     env:
       UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
+      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6

From dd5dd65a98ac7696af307968b89948effabefcf1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 18:34:42 +0100
Subject: [PATCH 11/18] fix: use explicit pytorch index to prevent markupsafe
 wheel conflict

Configure the pytorch CPU index as explicit=true in pyproject.toml so it is
ONLY used for torch (via [tool.uv.sources]). All other packages (including
markupsafe) are resolved exclusively from PyPI, preventing the pytorch index
from serving incompatible cp314-only wheels for non-pytorch packages.

Remove UV_INDEX and UV_INDEX_STRATEGY from CI workflow (no longer needed
since the index is now configured in pyproject.toml).
---
 .github/workflows/test.yml        |  28 ---
 hindsight-api-slim/pyproject.toml |  13 +-
 uv.lock                           | 320 ++++++++++--------------------
 3 files changed, 117 insertions(+), 244 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1c08a664..215a73a7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -199,8 +199,6 @@ jobs:
       HINDSIGHT_API_LLM_MODEL: google/gemini-2.5-flash-lite
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -422,8 +420,6 @@ jobs:
       HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -486,8 +482,6 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -590,8 +584,6 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -723,8 +715,6 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -831,8 +821,6 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       # Prefer CPU-only PyTorch in CI (but keep PyPI for everything else)
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -937,8 +925,6 @@ jobs:
       HINDSIGHT_API_URL: http://localhost:8888
       HINDSIGHT_EMBED_PACKAGE_PATH: ${{ github.workspace }}/hindsight-embed
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1045,8 +1031,6 @@ jobs:
       HINDSIGHT_API_LLM_MODEL: google/gemini-2.5-flash-lite
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1290,8 +1274,6 @@ jobs:
       HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY: /tmp/gcp-credentials.json
       HINDSIGHT_API_LLM_MODEL: google/gemini-2.5-flash-lite
       # Prefer CPU-only PyTorch in CI
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1349,8 +1331,6 @@ jobs:
       HINDSIGHT_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY: /tmp/gcp-credentials.json
       HINDSIGHT_LLM_MODEL: google/gemini-2.5-flash-lite
       # Prefer CPU-only PyTorch in CI
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1406,8 +1386,6 @@ jobs:
       HINDSIGHT_API_LLM_MODEL: google/gemini-2.5-flash-lite
       HINDSIGHT_API_URL: http://localhost:8888
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1541,8 +1519,6 @@ jobs:
       HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY: /tmp/gcp-credentials.json
       HINDSIGHT_API_LLM_MODEL: google/gemini-2.5-flash-lite
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1616,8 +1592,6 @@ jobs:
   verify-generated-files:
     runs-on: ubuntu-latest
     env:
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
@@ -1689,8 +1663,6 @@ jobs:
   check-openapi-compatibility:
     runs-on: ubuntu-latest
     env:
-      UV_INDEX: pytorch=https://download.pytorch.org/whl/cpu
-      UV_INDEX_STRATEGY: unsafe-best-match
 
     steps:
     - uses: actions/checkout@v6
diff --git a/hindsight-api-slim/pyproject.toml b/hindsight-api-slim/pyproject.toml
index 5636a50c..cb7b988c 100644
--- a/hindsight-api-slim/pyproject.toml
+++ b/hindsight-api-slim/pyproject.toml
@@ -168,9 +168,16 @@ quote-style = "double"
 indent-style = "space"
 
 [tool.uv]
-# Allow uv to search all configured indexes for packages, not just the first one
-# This prevents dependency resolution failures when using pytorch index + PyPI
-index-strategy = "unsafe-best-match"
+# Use explicit index for PyTorch to prevent the pytorch index from serving
+# non-pytorch packages (e.g. markupsafe) with incompatible wheels
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[tool.uv.sources]
+# Route torch to the CPU-only PyTorch index; everything else uses PyPI
+torch = { index = "pytorch-cpu" }
 
 [tool.ty]
 # Type checking configuration
diff --git a/uv.lock b/uv.lock
index ce995723..38632e30 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4,12 +4,16 @@ requires-python = ">=3.11"
 resolution-markers = [
     "python_full_version >= '3.14' and sys_platform == 'win32'",
     "python_full_version == '3.13.*' and sys_platform == 'win32'",
-    "python_full_version >= '3.14' and sys_platform != 'win32'",
-    "python_full_version == '3.13.*' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version >= '3.14' and sys_platform == 'darwin'",
+    "python_full_version == '3.13.*' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'darwin'",
     "python_full_version == '3.12.*' and sys_platform == 'win32'",
-    "python_full_version == '3.12.*' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform == 'darwin'",
     "python_full_version < '3.12' and sys_platform == 'win32'",
-    "python_full_version < '3.12' and sys_platform != 'win32'",
+    "python_full_version < '3.12' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version < '3.12' and sys_platform == 'darwin'",
 ]
 
 [manifest]
@@ -603,15 +607,12 @@ name = "claude-agent-sdk"
 version = "0.1.31"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
-    { name = "mcp" },
+    { name = "anyio", marker = "sys_platform != 'win32'" },
+    { name = "mcp", marker = "sys_platform != 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6d/df/071dce5803c4db8cd53708bcda3b6022c1c4b68fc00e9007593309515286/claude_agent_sdk-0.1.31.tar.gz", hash = "sha256:b68c681083d7cc985dd3e48f73aabf459f056c1a7e1c5b9c47033c6af94da1a1", size = 61191 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/7c/e249a3b4215e28a9722b3d9ab6057bceeeaa2b948530f022065ef2154555/claude_agent_sdk-0.1.31-py3-none-macosx_11_0_arm64.whl", hash = "sha256:801bacfe4192782a7cc7b61b0d23a57f061c069993dd3dfa8109aa2e7050a530", size = 54284257 },
-    { url = "https://files.pythonhosted.org/packages/d6/a8/1a8288736aeafcc48e3dcb3326ec7f487dbf89ebba77d526e9464786a299/claude_agent_sdk-0.1.31-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:0b608e0cbfcedcb827427e6d16a73fe573d58e7f93e15f95435066feacbe6511", size = 68462461 },
-    { url = "https://files.pythonhosted.org/packages/26/7a/7dcd0b77263ed55b17554fa3a67a6772b788e7048a524fd06c9baa970564/claude_agent_sdk-0.1.31-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:d0cb30e026a22246e84d9237d23bb4df20be5146913a04d2802ddd37d4f8b8c9", size = 70173234 },
-    { url = "https://files.pythonhosted.org/packages/37/a5/4a8de7a9738f454b54aa97557f0fba9c74b0901ea418597008c668243fea/claude_agent_sdk-0.1.31-py3-none-win_amd64.whl", hash = "sha256:8ceca675c2770ad739bd1208362059a830e91c74efcf128045b5a7af14d36f2b", size = 72366975 },
 ]
 
 [[package]]
@@ -1504,7 +1505,7 @@ dependencies = [
     { name = "anthropic" },
     { name = "asyncpg" },
     { name = "authlib" },
-    { name = "claude-agent-sdk" },
+    { name = "claude-agent-sdk", marker = "sys_platform == 'darwin'" },
     { name = "cohere" },
     { name = "cryptography" },
     { name = "dateparser" },
@@ -1556,7 +1557,8 @@ all = [
     { name = "pg0-embedded" },
     { name = "safetensors" },
     { name = "sentence-transformers" },
-    { name = "torch" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
     { name = "transformers" },
 ]
 embedded-db = [
@@ -1569,7 +1571,8 @@ local-ml = [
     { name = "mlx-lm" },
     { name = "safetensors" },
     { name = "sentence-transformers" },
-    { name = "torch" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
     { name = "transformers" },
 ]
 test = [
@@ -1601,7 +1604,7 @@ requires-dist = [
     { name = "anthropic", specifier = ">=0.40.0" },
     { name = "asyncpg", specifier = ">=0.29.0" },
     { name = "authlib", specifier = ">=1.6.6" },
-    { name = "claude-agent-sdk", specifier = ">=0.1.27" },
+    { name = "claude-agent-sdk", marker = "sys_platform == 'darwin'", specifier = ">=0.1.27" },
     { name = "cohere", specifier = ">=5.0.0" },
     { name = "cryptography", specifier = ">=46.0.5" },
     { name = "dateparser", specifier = ">=1.2.2" },
@@ -1651,7 +1654,7 @@ requires-dist = [
     { name = "sqlalchemy", specifier = ">=2.0.44" },
     { name = "testcontainers", marker = "extra == 'test'", specifier = ">=4.0.0" },
     { name = "tiktoken", specifier = ">=0.12.0" },
-    { name = "torch", marker = "extra == 'local-ml'", specifier = ">=2.6.0" },
+    { name = "torch", marker = "extra == 'local-ml'", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" },
     { name = "transformers", marker = "extra == 'local-ml'", specifier = ">=4.53.0" },
     { name = "typer", specifier = ">=0.9.0" },
     { name = "urllib3", specifier = ">=2.6.3" },
@@ -2906,140 +2909,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/95/8e/2844c3959ce9a63acc7c8e50881133d86666f0420bcde695e115ced0920f/numpy-2.3.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:81b3a59793523e552c4a96109dde028aa4448ae06ccac5a76ff6532a85558a7f", size = 12973130 },
 ]
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921 },
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621 },
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029 },
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765 },
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'win32'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467 },
-]
-
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695 },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834 },
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976 },
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'win32'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'win32'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905 },
-]
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466 },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691 },
-]
-
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.27.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229 },
-]
-
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836 },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.3.20"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145 },
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954 },
-]
-
 [[package]]
 name = "oauthlib"
 version = "3.3.1"
@@ -4945,8 +4814,8 @@ name = "secretstorage"
 version = "3.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cryptography", marker = "sys_platform != 'win32'" },
-    { name = "jeepney", marker = "sys_platform != 'win32'" },
+    { name = "cryptography", marker = "sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "jeepney", marker = "sys_platform != 'darwin' and sys_platform != 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/32/8a/ed6747b1cc723c81f526d4c12c1b1d43d07190e1e8258dbf934392fc850e/secretstorage-3.4.1.tar.gz", hash = "sha256:a799acf5be9fb93db609ebaa4ab6e8f1f3ed5ae640e0fa732bfea59e9c3b50e8", size = 19871 }
 wheels = [
@@ -4961,7 +4830,8 @@ dependencies = [
     { name = "huggingface-hub" },
     { name = "scikit-learn" },
     { name = "scipy" },
-    { name = "torch" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
     { name = "tqdm" },
     { name = "transformers" },
     { name = "typing-extensions" },
@@ -5331,58 +5201,95 @@ wheels = [
 
 [[package]]
 name = "torch"
-version = "2.9.1"
-source = { registry = "https://pypi.org/simple" }
+version = "2.10.0"
+source = { registry = "https://download.pytorch.org/whl/cpu" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'darwin'",
+    "python_full_version == '3.13.*' and sys_platform == 'darwin'",
+    "python_full_version == '3.12.*' and sys_platform == 'darwin'",
+    "python_full_version < '3.12' and sys_platform == 'darwin'",
+]
 dependencies = [
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "jinja2" },
-    { name = "networkx" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools", marker = "python_full_version >= '3.12'" },
-    { name = "sympy" },
-    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "typing-extensions" },
+    { name = "filelock", marker = "sys_platform == 'darwin'" },
+    { name = "fsspec", marker = "sys_platform == 'darwin'" },
+    { name = "jinja2", marker = "sys_platform == 'darwin'" },
+    { name = "networkx", marker = "sys_platform == 'darwin'" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform == 'darwin'" },
+    { name = "sympy", marker = "sys_platform == 'darwin'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/15/db/c064112ac0089af3d2f7a2b5bfbabf4aa407a78b74f87889e524b91c5402/torch-2.9.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:62b3fd888277946918cba4478cf849303da5359f0fb4e3bfb86b0533ba2eaf8d", size = 104220430 },
-    { url = "https://files.pythonhosted.org/packages/56/be/76eaa36c9cd032d3b01b001e2c5a05943df75f26211f68fae79e62f87734/torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d033ff0ac3f5400df862a51bdde9bad83561f3739ea0046e68f5401ebfa67c1b", size = 899821446 },
-    { url = "https://files.pythonhosted.org/packages/47/cc/7a2949e38dfe3244c4df21f0e1c27bce8aedd6c604a587dd44fc21017cb4/torch-2.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:0d06b30a9207b7c3516a9e0102114024755a07045f0c1d2f2a56b1819ac06bcb", size = 110973074 },
-    { url = "https://files.pythonhosted.org/packages/1e/ce/7d251155a783fb2c1bb6837b2b7023c622a2070a0a72726ca1df47e7ea34/torch-2.9.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:52347912d868653e1528b47cafaf79b285b98be3f4f35d5955389b1b95224475", size = 74463887 },
-    { url = "https://files.pythonhosted.org/packages/0f/27/07c645c7673e73e53ded71705045d6cb5bae94c4b021b03aa8d03eee90ab/torch-2.9.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:da5f6f4d7f4940a173e5572791af238cb0b9e21b1aab592bd8b26da4c99f1cd6", size = 104126592 },
-    { url = "https://files.pythonhosted.org/packages/19/17/e377a460603132b00760511299fceba4102bd95db1a0ee788da21298ccff/torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:27331cd902fb4322252657f3902adf1c4f6acad9dcad81d8df3ae14c7c4f07c4", size = 899742281 },
-    { url = "https://files.pythonhosted.org/packages/b1/1a/64f5769025db846a82567fa5b7d21dba4558a7234ee631712ee4771c436c/torch-2.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:81a285002d7b8cfd3fdf1b98aa8df138d41f1a8334fd9ea37511517cedf43083", size = 110940568 },
-    { url = "https://files.pythonhosted.org/packages/6e/ab/07739fd776618e5882661d04c43f5b5586323e2f6a2d7d84aac20d8f20bd/torch-2.9.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:c0d25d1d8e531b8343bea0ed811d5d528958f1dcbd37e7245bc686273177ad7e", size = 74479191 },
-    { url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743 },
-    { url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493 },
-    { url = "https://files.pythonhosted.org/packages/a6/47/c7843d69d6de8938c1cbb1eba426b1d48ddf375f101473d3e31a5fc52b74/torch-2.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:545844cc16b3f91e08ce3b40e9c2d77012dd33a48d505aed34b7740ed627a1b2", size = 110944162 },
-    { url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751 },
-    { url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929 },
-    { url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978 },
-    { url = "https://files.pythonhosted.org/packages/1f/9f/6986b83a53b4d043e36f3f898b798ab51f7f20fdf1a9b01a2720f445043d/torch-2.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2e1c42c0ae92bf803a4b2409fdfed85e30f9027a66887f5e7dcdbc014c7531db", size = 111176995 },
-    { url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347 },
-    { url = "https://files.pythonhosted.org/packages/48/50/c4b5112546d0d13cc9eaa1c732b823d676a9f49ae8b6f97772f795874a03/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1edee27a7c9897f4e0b7c14cfc2f3008c571921134522d5b9b5ec4ebbc69041a", size = 74433245 },
-    { url = "https://files.pythonhosted.org/packages/81/c9/2628f408f0518b3bae49c95f5af3728b6ab498c8624ab1e03a43dd53d650/torch-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:19d144d6b3e29921f1fc70503e9f2fc572cde6a5115c0c0de2f7ca8b1483e8b6", size = 104134804 },
-    { url = "https://files.pythonhosted.org/packages/28/fc/5bc91d6d831ae41bf6e9e6da6468f25330522e92347c9156eb3f1cb95956/torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c432d04376f6d9767a9852ea0def7b47a7bbc8e7af3b16ac9cf9ce02b12851c9", size = 899747132 },
-    { url = "https://files.pythonhosted.org/packages/63/5d/e8d4e009e52b6b2cf1684bde2a6be157b96fb873732542fb2a9a99e85a83/torch-2.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:d187566a2cdc726fc80138c3cdb260970fab1c27e99f85452721f7759bbd554d", size = 110934845 },
-    { url = "https://files.pythonhosted.org/packages/bd/b2/2d15a52516b2ea3f414643b8de68fa4cb220d3877ac8b1028c83dc8ca1c4/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cb10896a1f7fedaddbccc2017ce6ca9ecaaf990f0973bdfcf405439750118d2c", size = 74823558 },
-    { url = "https://files.pythonhosted.org/packages/86/5c/5b2e5d84f5b9850cd1e71af07524d8cbb74cba19379800f1f9f7c997fc70/torch-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:0a2bd769944991c74acf0c4ef23603b9c777fdf7637f115605a4b2d8023110c7", size = 104145788 },
-    { url = "https://files.pythonhosted.org/packages/a9/8c/3da60787bcf70add986c4ad485993026ac0ca74f2fc21410bc4eb1bb7695/torch-2.9.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:07c8a9660bc9414c39cac530ac83b1fb1b679d7155824144a40a54f4a47bfa73", size = 899735500 },
-    { url = "https://files.pythonhosted.org/packages/db/2b/f7818f6ec88758dfd21da46b6cd46af9d1b3433e53ddbb19ad1e0da17f9b/torch-2.9.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c88d3299ddeb2b35dcc31753305612db485ab6f1823e37fb29451c8b2732b87e", size = 111163659 },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0826ac8e409551e12b2360ac18b4161a838cbd111933e694752f351191331d09" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7fbbf409143a4fe0812a40c0b46a436030a7e1d14fe8c5234dfbe44df47f617e" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:b39cafff7229699f9d6e172cac74d85fd71b568268e439e08d9c540e54732a3e" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:90821a3194b8806d9fa9fdaa9308c1bc73df0c26808274b14129a97c99f35794" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:358bd7125cbec6e692d60618a5eec7f55a51b29e3652a849fd42af021d818023" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:470de4176007c2700735e003a830828a88d27129032a3add07291da07e2a94e8" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:4584ab167995c0479f6821e3dceaf199c8166c811d3adbba5d8eedbbfa6764fd" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:45a1c5057629444aeb1c452c18298fa7f30f2f7aeadd4dc41f9d340980294407" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:339e05502b6c839db40e88720cb700f5a3b50cda332284873e851772d41b2c1e" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:840351da59cedb7bcbc51981880050813c19ef6b898a7fecf73a3afc71aff3fe" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:c88b1129fd4e14f0f882963c6728315caae35d2f47374d17edeed1edc7697497" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f4bea7dc451267c028593751612ad559299589304e68df54ae7672427893ff2c" },
+]
+
+[[package]]
+name = "torch"
+version = "2.10.0+cpu"
+source = { registry = "https://download.pytorch.org/whl/cpu" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'win32'",
+    "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform == 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version < '3.12' and sys_platform == 'win32'",
+    "python_full_version < '3.12' and sys_platform != 'darwin' and sys_platform != 'win32'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform != 'darwin'" },
+    { name = "fsspec", marker = "sys_platform != 'darwin'" },
+    { name = "jinja2", marker = "sys_platform != 'darwin'" },
+    { name = "networkx", marker = "sys_platform != 'darwin'" },
+    { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'darwin'" },
+    { name = "sympy", marker = "sys_platform != 'darwin'" },
+    { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_aarch64.whl", hash = "sha256:ce5c113d1f55f8c1f5af05047a24e50d11d293e0cbbb5bf7a75c6c761edd6eaa" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_s390x.whl", hash = "sha256:0e286fcf6ce0cc7b204396c9b4ea0d375f1f0c3e752f68ce3d3aeb265511db8c" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1cfcb9b1558c6e52dffd0d4effce83b13c5ae5d97338164c372048c21f9cfccb" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7cb1ec66cefb90fd7b676eac72cfda3b8d4e4d0cacd7a531963bc2e0a9710ab" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:17a09465bab2aab8f0f273410297133d8d8fb6dd84dccbd252ca4a4f3a111847" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:c35c0de592941d4944698dbfa87271ab85d3370eca3b694943a2ab307ac34b3f" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_aarch64.whl", hash = "sha256:8de5a36371b775e2d4881ed12cc7f2de400b1ad3d728aa74a281f649f87c9b8c" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:9accc30b56cb6756d4a9d04fcb8ebc0bb68c7d55c1ed31a8657397d316d31596" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:179451716487f8cb09b56459667fa1f5c4c0946c1e75fbeae77cfc40a5768d87" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ee40b8a4b4b2cf0670c6fd4f35a7ef23871af956fecb238fbf5da15a72650b1d" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:21cb5436978ef47c823b7a813ff0f8c2892e266cfe0f1d944879b5fba81bf4e1" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:3eaa727e6a73affa61564d86b9d03191df45c8650d0666bd3d57c8597ef61e78" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_aarch64.whl", hash = "sha256:fd215f3d0f681905c5b56b0630a3d666900a37fcc3ca5b937f95275c66f9fd9c" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:170a0623108055be5199370335cf9b41ba6875b3cb6f086db4aee583331a4899" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e51994492cdb76edce29da88de3672a3022f9ef0ffd90345436948d4992be2c7" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8d316e5bf121f1eab1147e49ad0511a9d92e4c45cc357d1ab0bee440da71a095" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b719da5af01b59126ac13eefd6ba3dd12d002dc0e8e79b8b365e55267a8189d3" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:b67d91326e4ed9eccbd6b7d84ed7ffa43f93103aa3f0b24145f3001f3b11b714" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_aarch64.whl", hash = "sha256:5af75e5f49de21b0bdf7672bc27139bd285f9e8dbcabe2d617a2eb656514ac36" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-linux_s390x.whl", hash = "sha256:ba51ef01a510baf8fff576174f702c47e1aa54389a9f1fba323bb1a5003ff0bf" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0fedcb1a77e8f2aaf7bfd21591bf6d1e0b207473268c9be16b17cb7783253969" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:106dd1930cb30a4a337366ba3f9b25318ebf940f51fd46f789281dd9e736bdc4" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:eb1bde1ce198f05c8770017de27e001d404499cf552aaaa014569eff56ca25c0" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_aarch64.whl", hash = "sha256:ea2bcc9d1fca66974a71d4bf9a502539283f35d61fcab5a799b4e120846f1e02" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-linux_s390x.whl", hash = "sha256:f8294fd2fc6dd8f4435a891a0122307a043b14b21f0dac1bca63c85bfb59e586" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:a28fdbcfa2fbacffec81300f24dd1bed2b0ccfdbed107a823cff12bc1db070f6" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:aada8afc068add586464b2a55adb7cc9091eec55caf5320447204741cb6a0604" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:2adc71fe471e98a608723bfc837f7e1929885ebb912c693597711e139c1cda41" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_aarch64.whl", hash = "sha256:9412bd37b70f5ebd1205242c4ba4cabae35a605947f2b30806d5c9b467936db9" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-linux_s390x.whl", hash = "sha256:e71c476517c33e7db69825a9ff46c7f47a723ec4dac5b2481cff4246d1c632be" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:23882f8d882460aca809882fc42f5e343bf07585274f929ced00177d1be1eb67" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4fcd8b4cc2ae20f2b7749fb275349c55432393868778c2d50a08e81d5ee5591e" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:ffc8da9a1341092d6a90cb5b1c1a33cd61abf0fb43f0cd88443c27fa372c26ae" },
 ]
 
 [[package]]
@@ -5436,19 +5343,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827 },
 ]
 
-[[package]]
-name = "triton"
-version = "3.5.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b0/72/ec90c3519eaf168f22cb1757ad412f3a2add4782ad3a92861c9ad135d886/triton-3.5.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61413522a48add32302353fdbaaf92daaaab06f6b5e3229940d21b5207f47579", size = 170425802 },
-    { url = "https://files.pythonhosted.org/packages/f2/50/9a8358d3ef58162c0a415d173cfb45b67de60176e1024f71fbc4d24c0b6d/triton-3.5.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d2c6b915a03888ab931a9fd3e55ba36785e1fe70cbea0b40c6ef93b20fc85232", size = 170470207 },
-    { url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410 },
-    { url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924 },
-    { url = "https://files.pythonhosted.org/packages/a4/e6/c595c35e5c50c4bc56a7bac96493dad321e9e29b953b526bbbe20f9911d0/triton-3.5.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0637b1efb1db599a8e9dc960d53ab6e4637db7d4ab6630a0974705d77b14b60", size = 170480488 },
-    { url = "https://files.pythonhosted.org/packages/16/b5/b0d3d8b901b6a04ca38df5e24c27e53afb15b93624d7fd7d658c7cd9352a/triton-3.5.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bac7f7d959ad0f48c0e97d6643a1cc0fd5786fe61cb1f83b537c6b2d54776478", size = 170582192 },
-]
-
 [[package]]
 name = "ty"
 version = "0.0.8"

From 036e7381f9216fbda063caa41006a0f3396a3dc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 18:56:33 +0100
Subject: [PATCH 12/18] ci: trigger CI run


From abd5effc4ab6a5e528a496ca7e88c6aa18e9b56a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 19:01:03 +0100
Subject: [PATCH 13/18] ci: retry trigger


From 7eba2fcbea3f39b044f6fe42d75e40cfdd8fcf85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 19:02:16 +0100
Subject: [PATCH 14/18] ci: trigger after remote URL fix


From 9c92a52ad8fbcfe79731c30ad15bf65f5753a28e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 19:03:43 +0100
Subject: [PATCH 15/18] ci: add workflow_dispatch to unblock manual trigger

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 215a73a7..11edfcba 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,6 +3,7 @@ name: CI
 on:
   pull_request:
     branches: [ main ]
+  workflow_dispatch:
 
 concurrency:
   group: ci-${{ github.workflow }}-${{ github.ref }}

From 0cf894abc61c917c894e303b4e35cee570ead408 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 19:04:41 +0100
Subject: [PATCH 16/18] fix: remove empty env blocks left after UV_INDEX
 removal

---
 .github/workflows/test.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 11edfcba..8f8d1d1a 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1592,8 +1592,6 @@ jobs:
 
   verify-generated-files:
     runs-on: ubuntu-latest
-    env:
-
     steps:
     - uses: actions/checkout@v6
 
@@ -1663,8 +1661,6 @@ jobs:
 
   check-openapi-compatibility:
     runs-on: ubuntu-latest
-    env:
-
     steps:
     - uses: actions/checkout@v6
       with:

From 5d04d005ca2205254b590964572d17b2fe944c98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 19:19:58 +0100
Subject: [PATCH 17/18] fix: add type: ignore for optional claude_agent_sdk
 imports (macOS-only)

---
 .../hindsight_api/engine/providers/claude_code_llm.py       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py b/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py
index 2eaa8f2b..0eb60f92 100644
--- a/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py
+++ b/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py
@@ -68,7 +68,7 @@ def _verify_claude_code_available(self) -> None:
             # Reduce Claude Agent SDK logging verbosity
             import logging as sdk_logging
 
-            from claude_agent_sdk import query  # noqa: F401
+            from claude_agent_sdk import query  # noqa: F401  # type: ignore[import-untyped]
 
             sdk_logging.getLogger("claude_agent_sdk").setLevel(sdk_logging.WARNING)
             sdk_logging.getLogger("claude_agent_sdk._internal").setLevel(sdk_logging.WARNING)
@@ -141,7 +141,7 @@ async def call(
             OutputTooLongError: If output exceeds token limits (not supported by Claude Agent SDK).
             Exception: Re-raises API errors after retries exhausted.
         """
-        from claude_agent_sdk import AssistantMessage, ClaudeAgentOptions, TextBlock, query
+        from claude_agent_sdk import AssistantMessage, ClaudeAgentOptions, TextBlock, query  # type: ignore[import-untyped]
 
         start_time = time.time()
 
@@ -331,7 +331,7 @@ async def call_with_tools(
         Returns:
             LLMToolCallResult with content and/or tool_calls.
         """
-        from claude_agent_sdk import (
+        from claude_agent_sdk import (  # type: ignore[import-untyped]
             AssistantMessage,
             ClaudeAgentOptions,
             ClaudeSDKClient,

From d42955db8e3dd15ac45a03dcef8efa90ccb66b9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Tue, 17 Mar 2026 19:33:32 +0100
Subject: [PATCH 18/18] fix: correct type: ignore rules for claude_agent_sdk
 and fix utcnow deprecation

---
 hindsight-api-slim/hindsight_api/api/http.py          |  4 ++--
 .../hindsight_api/engine/llm_wrapper.py               |  2 +-
 .../hindsight_api/engine/providers/claude_code_llm.py | 11 ++++++++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
index 567ef1e6..a32dcad2 100644
--- a/hindsight-api-slim/hindsight_api/api/http.py
+++ b/hindsight-api-slim/hindsight_api/api/http.py
@@ -10,7 +10,7 @@
 import logging
 import uuid
 from contextlib import asynccontextmanager
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Literal
 
 from fastapi import Depends, FastAPI, File, Form, Header, HTTPException, Query, UploadFile
@@ -4171,7 +4171,7 @@ async def api_create_webhook(
             await bank_utils.get_bank_profile(pool, bank_id)
 
             webhook_id = uuid.uuid4()
-            now = datetime.utcnow().isoformat() + "Z"
+            now = datetime.now(timezone.utc).isoformat()
             row = await pool.fetchrow(
                 f"""
                 INSERT INTO {fq_table("webhooks")}
diff --git a/hindsight-api-slim/hindsight_api/engine/llm_wrapper.py b/hindsight-api-slim/hindsight_api/engine/llm_wrapper.py
index df63d5e2..4fd4c2e1 100644
--- a/hindsight-api-slim/hindsight_api/engine/llm_wrapper.py
+++ b/hindsight-api-slim/hindsight_api/engine/llm_wrapper.py
@@ -633,7 +633,7 @@ def _verify_claude_code_available(self) -> None:
             # Reduce Claude Agent SDK logging verbosity
             import logging as sdk_logging
 
-            from claude_agent_sdk import query  # noqa: F401
+            from claude_agent_sdk import query  # noqa: F401  # type: ignore[unresolved-import]
 
             sdk_logging.getLogger("claude_agent_sdk").setLevel(sdk_logging.WARNING)
             sdk_logging.getLogger("claude_agent_sdk._internal").setLevel(sdk_logging.WARNING)
diff --git a/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py b/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py
index 0eb60f92..c91d2947 100644
--- a/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py
+++ b/hindsight-api-slim/hindsight_api/engine/providers/claude_code_llm.py
@@ -68,7 +68,7 @@ def _verify_claude_code_available(self) -> None:
             # Reduce Claude Agent SDK logging verbosity
             import logging as sdk_logging
 
-            from claude_agent_sdk import query  # noqa: F401  # type: ignore[import-untyped]
+            from claude_agent_sdk import query  # noqa: F401  # type: ignore[unresolved-import]
 
             sdk_logging.getLogger("claude_agent_sdk").setLevel(sdk_logging.WARNING)
             sdk_logging.getLogger("claude_agent_sdk._internal").setLevel(sdk_logging.WARNING)
@@ -141,7 +141,12 @@ async def call(
             OutputTooLongError: If output exceeds token limits (not supported by Claude Agent SDK).
             Exception: Re-raises API errors after retries exhausted.
         """
-        from claude_agent_sdk import AssistantMessage, ClaudeAgentOptions, TextBlock, query  # type: ignore[import-untyped]
+        from claude_agent_sdk import (  # type: ignore[unresolved-import]
+            AssistantMessage,
+            ClaudeAgentOptions,
+            TextBlock,
+            query,
+        )
 
         start_time = time.time()
 
@@ -331,7 +336,7 @@ async def call_with_tools(
         Returns:
             LLMToolCallResult with content and/or tool_calls.
         """
-        from claude_agent_sdk import (  # type: ignore[import-untyped]
+        from claude_agent_sdk import (  # type: ignore[unresolved-import]
             AssistantMessage,
             ClaudeAgentOptions,
             ClaudeSDKClient,