drewburchfield · drewburchfield · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/src/graph_builder.py b/src/graph_builder.py
@@ -146,7 +146,8 @@ async def _get_note_info(self, note_path: str) -> dict | None:
         try:
             async with self.store.pool.acquire() as conn:
                 result = await conn.fetchrow(
-                    "SELECT path, title FROM notes WHERE path = $1", note_path
+                    "SELECT path, title FROM notes WHERE path = $1 ORDER BY chunk_index LIMIT 1",
+                    note_path,
                 )
 
                 if result:

diff --git a/src/hub_analyzer.py b/src/hub_analyzer.py
@@ -9,6 +9,7 @@
 
 from loguru import logger
 
+from .exceptions import DatabaseError
 from .vector_store import PostgreSQLVectorStore
 
 
@@ -54,7 +55,7 @@ async def get_hub_notes(
             List of {path, title, connection_count}
         """
         if not self.store.pool:
-            raise ValueError("Store not initialized")
+            raise DatabaseError("Store not initialized - call initialize() first")
 
         try:
             # Check if connection_count needs refresh
@@ -101,7 +102,7 @@ async def get_orphaned_notes(
             List of {path, title, connection_count, modified_at}
         """
         if not self.store.pool:
-            raise ValueError("Store not initialized")
+            raise DatabaseError("Store not initialized - call initialize() first")
 
         try:
             # Check if connection_count needs refresh
@@ -168,16 +169,6 @@ async def _ensure_fresh_counts(self, threshold: float):
         except Exception as e:
             logger.warning(f"Failed to check count freshness: {e}")
 
-    async def _refresh_all_counts(self, threshold: float):
-        """
-        Refresh connection_count for all notes (acquires lock).
-
-        Convenience wrapper that acquires _refresh_lock before refreshing.
-        Prefer _do_refresh() when caller already holds the lock.
-        """
-        async with self._refresh_lock:
-            await self._do_refresh(threshold)
-
     async def _do_refresh(self, threshold: float):
         """
         Refresh connection_count for all notes (caller must hold _refresh_lock).

diff --git a/src/indexer.py b/src/indexer.py
@@ -143,26 +143,16 @@ async def index_vault(vault_path: str, batch_size: int = 100):
                 except Exception as e:
                     logger.error(f"Error reading {file_path}: {e}")
 
-            # Filter out notes with empty content
-            # (Large notes will be auto-chunked by embed_with_chunks)
-            valid_notes = []
-            for note in notes_data:
-                if not note["content"] or len(note["content"].strip()) == 0:
-                    logger.warning(f"Skipping empty note: {note['path']}")
-                    continue
-
-                valid_notes.append(note)
-
-            if not valid_notes:
+            if not notes_data:
                 logger.warning(f"No valid notes in batch {i // batch_size + 1}")
                 continue
 
-            logger.info(f"Batch: {len(valid_notes)} valid notes")
+            logger.info(f"Batch: {len(notes_data)} valid notes")
 
             # Process each note with automatic chunking
             notes = []
             batch_failed_notes = []
-            for note_data in valid_notes:
+            for note_data in notes_data:
                 # embed_with_chunks handles both small (whole) and large (chunked) notes
                 try:
                     embeddings_list, total_chunks = embedder.embed_with_chunks(

diff --git a/src/schema.sql b/src/schema.sql
@@ -1,11 +1,15 @@
--- Obsidian Graph MCP Server - PostgreSQL Schema
+-- Obsidian Graph - PostgreSQL Schema
 --
--- This schema is designed for storing whole Obsidian notes (not chunked documents)
--- with vector embeddings for semantic search and graph analysis.
+-- Stores notes (whole or chunked) with vector embeddings for
+-- semantic search, graph analysis, and hub/orphan detection.
 
 -- Enable pgvector extension
 CREATE EXTENSION IF NOT EXISTS vector;
 
+-- Migration: remove trigger that overwrites file mtime with DB timestamp
+DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes;
+DROP FUNCTION IF EXISTS update_modified_at();
+
 -- Main notes table with vector embeddings
 -- Supports chunking for large notes (voyage-context-3 pattern)
 CREATE TABLE IF NOT EXISTS notes (
@@ -42,18 +46,3 @@ CREATE INDEX IF NOT EXISTS idx_notes_modified_at ON notes(modified_at);
 CREATE INDEX IF NOT EXISTS idx_notes_connection_count ON notes(connection_count DESC);
 CREATE INDEX IF NOT EXISTS idx_notes_last_indexed_at ON notes(last_indexed_at);
 CREATE INDEX IF NOT EXISTS idx_notes_chunk_index ON notes(chunk_index);
-
--- Function to update modified_at timestamp automatically
-CREATE OR REPLACE FUNCTION update_modified_at()
-RETURNS TRIGGER AS $$
-BEGIN
-    NEW.modified_at = CURRENT_TIMESTAMP;
-    RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-
--- Trigger to auto-update modified_at on note updates
-CREATE TRIGGER trigger_update_notes_modified_at
-    BEFORE UPDATE ON notes
-    FOR EACH ROW
-    EXECUTE FUNCTION update_modified_at();
diff --git a/src/validation.py b/src/validation.py
@@ -72,7 +72,7 @@ def validate_int_range(
         max_val: Maximum allowed value (inclusive)
 
     Returns:
-        Validated integer (clamped to range)
+        Validated integer. Raises ValidationError if out of range.
 
     Raises:
         ValidationError: If value is out of range
@@ -114,7 +114,7 @@ def validate_float_range(
         max_val: Maximum allowed value (inclusive)
 
     Returns:
-        Validated float (clamped to range)
+        Validated float. Raises ValidationError if out of range.
 
     Raises:
         ValidationError: If value is out of range

diff --git a/src/vector_store.py b/src/vector_store.py
@@ -125,6 +125,12 @@ async def initialize(self) -> None:
                 if not table_exists:
                     logger.warning("Notes table does not exist yet (will be created by schema.sql)")
 
+                # Migration: remove trigger that overwrites file mtime (existing databases)
+                await conn.execute(
+                    "DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
+                )
+                await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
-                # Migration: remove trigger that overwrites file mtime (existing databases)
-                await conn.execute(
-                    "DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
-                )
-                await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
+                # Migration: remove trigger that overwrites file mtime (existing databases)
+                if table_exists:
+                    await conn.execute(
+                        "DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
+                    )
+                    await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
-                # Migration: remove trigger that overwrites file mtime (existing databases)
-                await conn.execute(
-                    "DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
-                )
-                await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
+                # Migration: remove trigger that overwrites file mtime (existing databases)
+                if table_exists:
+                    await conn.execute(
+                        "DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
+                    )
+                    await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
+
             logger.info(f"PostgreSQL connected: {self.max_connections} max connections")
 
         except asyncpg.PostgresError as e:

diff --git a/tests/test_error_paths.py b/tests/test_error_paths.py
@@ -70,7 +70,8 @@ async def timeout_side_effect(*args, **kwargs):
         assert len(result) == 1
         assert "Error" in result[0]["text"]
         # Should contain timeout/pool information
-        assert "pool" in result[0]["text"].lower() or "timeout" in result[0]["text"].lower()
+        text = result[0]["text"].lower()
+        assert "pool" in text or "timeout" in text
 
     @pytest.mark.asyncio
     async def test_get_similar_notes_note_not_found(self, server_context):

diff --git a/tests/test_hub_analyzer.py b/tests/test_hub_analyzer.py
@@ -125,15 +125,15 @@ async def __aexit__(self, *args):
     refresh_count = 0
     count_lock = asyncio.Lock()
 
-    original_refresh = analyzer._refresh_all_counts
+    original_refresh = analyzer._do_refresh
 
     async def tracked_refresh(threshold):
         nonlocal refresh_count
         async with count_lock:
             refresh_count += 1
         await original_refresh(threshold)
 
-    analyzer._refresh_all_counts = tracked_refresh
+    analyzer._do_refresh = tracked_refresh
 
     # Start 5 concurrent refreshes
     tasks = [asyncio.create_task(tracked_refresh(0.5)) for _ in range(5)]
@@ -222,7 +222,7 @@ async def __aexit__(self, *args):
     mock_store.pool.acquire = MagicMock(return_value=MockAcquire())
 
     # Run refresh
-    await analyzer._refresh_all_counts(threshold=0.5)
+    await analyzer._do_refresh(threshold=0.5)
 
     # Batched approach: 3 notes fit in 1 batch (batch_size=100), so 1 execute call
     assert mock_conn.execute.call_count == 1