drewburchfield · drewburchfield · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · devin-ai-integration
diff --git a/docs/TOOLS.md b/docs/TOOLS.md
@@ -432,9 +432,9 @@ Start high, go lower:
 
 ## Limitations
 
-1. **Oversized notes:** Notes >32k tokens (~120k chars) currently skipped
-   - Planned: Automatic chunking for these notes
-   - Workaround: Manually split large notes
+1. **Oversized notes:** Notes >32k tokens (~120k chars) are automatically chunked
+   - Split into 2000-char chunks with context preserved via voyage-context-3
+   - Dynamic batch sizing adapts to chunk density
 
 2. **Empty notes:** Skipped during indexing
    - Warning logged with file path

diff --git a/src/embedder.py b/src/embedder.py
@@ -42,6 +42,14 @@ def _redact_sensitive(message: str) -> str:
     return message
 
 
+def _is_token_limit_error(e: Exception) -> bool:
+    """Check if an exception is a deterministic token limit error (not retryable)."""
+    error_lower = str(e).lower()
+    return ("token" in error_lower and "context window" in error_lower) or (
+        "too many tokens" in error_lower
+    )
+
+
 class VoyageEmbedder:
     """
     Voyage Context-3 embedding client with caching and rate limiting.
@@ -165,37 +173,67 @@ def embed_with_chunks(
 
         # If under limit, embed whole
         if estimated_tokens < 30000:
-            embedding = self.embed(text, input_type=input_type)
-            return ([embedding], 1)
+            try:
+                embedding = self.embed(text, input_type=input_type)
+                return ([embedding], 1)
+            except EmbeddingError as e:
+                if _is_token_limit_error(e):
+                    logger.warning(
+                        f"Whole-note embed failed (est. {estimated_tokens:.0f} tokens), "
+                        f"falling back to chunked embedding"
+                    )
+                    # Fall through to chunking below
+                else:
+                    raise
 
         # Split into chunks
         chunks = self.chunk_text(text, chunk_size=chunk_size, overlap=0)
         logger.info(f"Large note: splitting into {len(chunks)} chunks")
 
-        # Embed chunks in batches (Voyage limit: ~60 chunks = 30k tokens per contextualized call)
+        # Calculate safe batch size based on actual chunk sizes
+        total_chars = sum(len(c) for c in chunks)
+        avg_chars_per_chunk = total_chars / len(chunks)
+        # Conservative: assume 3 chars/token for dense content safety margin
+        estimated_tokens_per_chunk = avg_chars_per_chunk / 3
+        batch_size = max(1, int(28000 / estimated_tokens_per_chunk))
+        logger.info(f"Dynamic batch size: {batch_size} (avg {avg_chars_per_chunk:.0f} chars/chunk)")
+
+        # Embed chunks in batches
         all_embeddings = []
-        batch_size = 60  # ~30k tokens per batch
 
         try:
-            for i in range(0, len(chunks), batch_size):
+            i = 0
+            while i < len(chunks):
                 chunk_batch = chunks[i : i + batch_size]
 
                 # Rate limit
                 self._rate_limit_sync()
 
-                # Embed this batch of chunks with context (with retry)
-                result = self._call_api_with_retry(
-                    self.client.contextualized_embed,
-                    inputs=[chunk_batch],  # One document's chunks
-                    model=self.model,
-                    input_type=input_type,
-                )
+                try:
+                    # Embed this batch of chunks with context
+                    result = self._call_api_with_retry(
+                        self.client.contextualized_embed,
+                        inputs=[chunk_batch],  # One document's chunks
+                        model=self.model,
+                        input_type=input_type,
+                    )
+
+                    # Extract embeddings
+                    batch_embeddings = result.results[0].embeddings
+                    all_embeddings.extend(batch_embeddings)
 
-                # Extract embeddings
-                batch_embeddings = result.results[0].embeddings
-                all_embeddings.extend(batch_embeddings)
+                    logger.debug(f"Embedded chunks {i + 1}-{i + len(chunk_batch)} of {len(chunks)}")
+                    i += batch_size
 
-                logger.debug(f"Embedded chunks {i + 1}-{i + len(chunk_batch)} of {len(chunks)}")
+                except EmbeddingError as e:
+                    if _is_token_limit_error(e) and batch_size > 1:
-                    if _is_token_limit_error(e) and batch_size > 1:
+                    if _is_token_limit_error(e) and len(chunk_batch) > 1:
-                    if _is_token_limit_error(e) and batch_size > 1:
+                    if _is_token_limit_error(e) and len(chunk_batch) > 1:
+                        # Halve batch size and retry this batch
+                        batch_size = max(1, batch_size // 2)
+                        logger.warning(
+                            f"Batch too large for token limit, reducing to {batch_size} chunks"
+                        )
+                        continue  # Retry same position with smaller batch
+                    raise
 
             logger.success(f"Embedded {len(all_embeddings)} chunks with context preserved")
             return (all_embeddings, len(chunks))
@@ -264,6 +302,14 @@ def _call_api_with_retry(self, api_func, *args, **kwargs):
                 last_error = e
                 error_msg = _redact_sensitive(str(e))
 
+                # Token limit errors are deterministic, don't retry
+                if _is_token_limit_error(e):
+                    logger.error(f"Token limit error (not retryable): {error_msg}")
+                    raise EmbeddingError(
+                        f"Token limit exceeded: {error_msg}",
+                        cause=e,
+                    ) from e
+
                 # Check if it's a rate limit error (429)
                 if "429" in str(e) or "rate" in str(e).lower():
                     # Exponential backoff: 2^attempt seconds (1, 2, 4, ...)

diff --git a/src/vector_store.py b/src/vector_store.py
@@ -2,7 +2,7 @@
 PostgreSQL+pgvector Vector Store for Obsidian Graph MCP Server.
 
 Adapted from oachatbot's PostgreSQL store, simplified for Obsidian notes:
-- Stores whole notes (not chunked documents)
+- Stores whole notes and chunked documents (automatic chunking for large notes)
 - Uses 'path' as identifier (not document_id)
 - No site_id or publish_date (Obsidian-specific)
 - Adds connection_count materialization for graph queries