Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ async def _get_note_info(self, note_path: str) -> dict | None:
try:
async with self.store.pool.acquire() as conn:
result = await conn.fetchrow(
"SELECT path, title FROM notes WHERE path = $1", note_path
"SELECT path, title FROM notes WHERE path = $1 ORDER BY chunk_index LIMIT 1",
note_path,
)

if result:
Expand Down
15 changes: 3 additions & 12 deletions src/hub_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from loguru import logger

from .exceptions import DatabaseError
from .vector_store import PostgreSQLVectorStore


Expand Down Expand Up @@ -54,7 +55,7 @@ async def get_hub_notes(
List of {path, title, connection_count}
"""
if not self.store.pool:
raise ValueError("Store not initialized")
raise DatabaseError("Store not initialized - call initialize() first")

try:
# Check if connection_count needs refresh
Expand Down Expand Up @@ -101,7 +102,7 @@ async def get_orphaned_notes(
List of {path, title, connection_count, modified_at}
"""
if not self.store.pool:
raise ValueError("Store not initialized")
raise DatabaseError("Store not initialized - call initialize() first")

try:
# Check if connection_count needs refresh
Expand Down Expand Up @@ -168,16 +169,6 @@ async def _ensure_fresh_counts(self, threshold: float):
except Exception as e:
logger.warning(f"Failed to check count freshness: {e}")

async def _refresh_all_counts(self, threshold: float):
"""
Refresh connection_count for all notes (acquires lock).

Convenience wrapper that acquires _refresh_lock before refreshing.
Prefer _do_refresh() when caller already holds the lock.
"""
async with self._refresh_lock:
await self._do_refresh(threshold)

async def _do_refresh(self, threshold: float):
"""
Refresh connection_count for all notes (caller must hold _refresh_lock).
Expand Down
16 changes: 3 additions & 13 deletions src/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,26 +143,16 @@ async def index_vault(vault_path: str, batch_size: int = 100):
except Exception as e:
logger.error(f"Error reading {file_path}: {e}")

# Filter out notes with empty content
# (Large notes will be auto-chunked by embed_with_chunks)
valid_notes = []
for note in notes_data:
if not note["content"] or len(note["content"].strip()) == 0:
logger.warning(f"Skipping empty note: {note['path']}")
continue

valid_notes.append(note)

if not valid_notes:
if not notes_data:
logger.warning(f"No valid notes in batch {i // batch_size + 1}")
continue

logger.info(f"Batch: {len(valid_notes)} valid notes")
logger.info(f"Batch: {len(notes_data)} valid notes")

# Process each note with automatic chunking
notes = []
batch_failed_notes = []
for note_data in valid_notes:
for note_data in notes_data:
# embed_with_chunks handles both small (whole) and large (chunked) notes
try:
embeddings_list, total_chunks = embedder.embed_with_chunks(
Expand Down
25 changes: 7 additions & 18 deletions src/schema.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
-- Obsidian Graph MCP Server - PostgreSQL Schema
-- Obsidian Graph - PostgreSQL Schema
--
-- This schema is designed for storing whole Obsidian notes (not chunked documents)
-- with vector embeddings for semantic search and graph analysis.
-- Stores notes (whole or chunked) with vector embeddings for
-- semantic search, graph analysis, and hub/orphan detection.

-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;

-- Migration: remove trigger that overwrites file mtime with DB timestamp
DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes;
DROP FUNCTION IF EXISTS update_modified_at();
Comment on lines +9 to +11
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Schema migration DROP TRIGGER is unreachable for existing databases

The DROP TRIGGER IF EXISTS migration at src/schema.sql:10-11 is placed inside the init schema file, which is mounted at docker-entrypoint-initdb.d/schema.sql (docker-compose.yml:54). Docker's PostgreSQL entrypoint only executes init scripts on first database creation (when the data volume is empty). For existing databases that already have the trigger, this migration never runs.

This means existing deployments will retain the old trigger_update_notes_modified_at trigger, which overwrites modified_at with CURRENT_TIMESTAMP on every UPDATE — including the _do_refresh batch UPDATE in src/hub_analyzer.py:225-244. This causes all notes' modified_at to be silently reset to the DB timestamp whenever connection counts are refreshed, making the orphaned notes' modified_at display meaningless.

Prompt for agents
The DROP TRIGGER/FUNCTION migration at schema.sql:10-11 is placed in a Docker init script that only runs on fresh database creation. Existing databases that have the trigger will never get it removed.

To fix this, consider one of:
1. Add a separate migration script (e.g., migrations/001_drop_modified_at_trigger.sql) that can be run manually or via a startup migration check in the Python code (e.g., in vector_store.py initialize() method).
2. Add the DROP TRIGGER logic to the Python store initialization code (PostgreSQLVectorStore.initialize), which runs on every server start and can check if the trigger exists before dropping it.
3. Document in the README/CHANGELOG that existing users must manually run: DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes; DROP FUNCTION IF EXISTS update_modified_at();
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Comment on lines +10 to +11
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 schema.sql DROP TRIGGER runs before CREATE TABLE, failing on fresh databases

DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes at line 10 executes before CREATE TABLE IF NOT EXISTS notes at line 15. In PostgreSQL, IF EXISTS on DROP TRIGGER only suppresses errors when the trigger doesn't exist — the referenced table must still exist. On a fresh database, the notes table hasn't been created yet, so this statement fails with ERROR: relation "notes" does not exist. Per docker-compose.yml:54, this file is mounted as /docker-entrypoint-initdb.d/schema.sql, meaning it's specifically designed to run on fresh database initialization — the exact scenario where it breaks.

Prompt for agents
The DROP TRIGGER IF EXISTS and DROP FUNCTION IF EXISTS statements at lines 10-11 of schema.sql run before the CREATE TABLE IF NOT EXISTS at line 15. On a fresh database where the notes table does not yet exist, DROP TRIGGER IF EXISTS ... ON notes will fail because PostgreSQL requires the table to exist (IF EXISTS only applies to the trigger, not the table).

The fix is to move these DROP statements after the CREATE TABLE IF NOT EXISTS notes statement. That way, on a fresh DB the table is created first (no trigger exists so the DROPs are no-ops), and on an existing DB the table already exists so the DROPs safely remove the old trigger/function before the CREATE TABLE IF NOT EXISTS is a no-op.

Alternatively, wrap the DROP TRIGGER in a DO block that checks for table existence first, but simply reordering the statements is the simplest fix.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


-- Main notes table with vector embeddings
-- Supports chunking for large notes (voyage-context-3 pattern)
CREATE TABLE IF NOT EXISTS notes (
Expand Down Expand Up @@ -42,18 +46,3 @@ CREATE INDEX IF NOT EXISTS idx_notes_modified_at ON notes(modified_at);
CREATE INDEX IF NOT EXISTS idx_notes_connection_count ON notes(connection_count DESC);
CREATE INDEX IF NOT EXISTS idx_notes_last_indexed_at ON notes(last_indexed_at);
CREATE INDEX IF NOT EXISTS idx_notes_chunk_index ON notes(chunk_index);

-- Function to update modified_at timestamp automatically
CREATE OR REPLACE FUNCTION update_modified_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.modified_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- Trigger to auto-update modified_at on note updates
CREATE TRIGGER trigger_update_notes_modified_at
BEFORE UPDATE ON notes
FOR EACH ROW
EXECUTE FUNCTION update_modified_at();
4 changes: 2 additions & 2 deletions src/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def validate_int_range(
max_val: Maximum allowed value (inclusive)

Returns:
Validated integer (clamped to range)
Validated integer. Raises ValidationError if out of range.

Raises:
ValidationError: If value is out of range
Expand Down Expand Up @@ -114,7 +114,7 @@ def validate_float_range(
max_val: Maximum allowed value (inclusive)

Returns:
Validated float (clamped to range)
Validated float. Raises ValidationError if out of range.

Raises:
ValidationError: If value is out of range
Expand Down
6 changes: 6 additions & 0 deletions src/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ async def initialize(self) -> None:
if not table_exists:
logger.warning("Notes table does not exist yet (will be created by schema.sql)")

# Migration: remove trigger that overwrites file mtime (existing databases)
await conn.execute(
"DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
)
await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
Comment on lines +128 to +132
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 initialize() runs DROP TRIGGER on potentially non-existent table

The migration code at lines 129-132 runs DROP TRIGGER IF EXISTS ... ON notes unconditionally, even when the table_exists check at src/vector_store.py:122-126 determined the notes table does not exist. PostgreSQL's IF EXISTS on DROP TRIGGER only suppresses errors for a missing trigger — the table itself must exist. When the table doesn't exist, this raises an asyncpg.PostgresError which is caught at line 136 and re-raised as VectorStoreError, causing initialize() to fail and preventing the server from starting on a fresh database.

Suggested change
# Migration: remove trigger that overwrites file mtime (existing databases)
await conn.execute(
"DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
)
await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
# Migration: remove trigger that overwrites file mtime (existing databases)
if table_exists:
await conn.execute(
"DROP TRIGGER IF EXISTS trigger_update_notes_modified_at ON notes"
)
await conn.execute("DROP FUNCTION IF EXISTS update_modified_at()")
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


logger.info(f"PostgreSQL connected: {self.max_connections} max connections")

except asyncpg.PostgresError as e:
Expand Down
3 changes: 2 additions & 1 deletion tests/test_error_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ async def timeout_side_effect(*args, **kwargs):
assert len(result) == 1
assert "Error" in result[0]["text"]
# Should contain timeout/pool information
assert "pool" in result[0]["text"].lower() or "timeout" in result[0]["text"].lower()
text = result[0]["text"].lower()
assert "pool" in text or "timeout" in text

@pytest.mark.asyncio
async def test_get_similar_notes_note_not_found(self, server_context):
Expand Down
6 changes: 3 additions & 3 deletions tests/test_hub_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,15 @@ async def __aexit__(self, *args):
refresh_count = 0
count_lock = asyncio.Lock()

original_refresh = analyzer._refresh_all_counts
original_refresh = analyzer._do_refresh

async def tracked_refresh(threshold):
nonlocal refresh_count
async with count_lock:
refresh_count += 1
await original_refresh(threshold)

analyzer._refresh_all_counts = tracked_refresh
analyzer._do_refresh = tracked_refresh

# Start 5 concurrent refreshes
tasks = [asyncio.create_task(tracked_refresh(0.5)) for _ in range(5)]
Expand Down Expand Up @@ -222,7 +222,7 @@ async def __aexit__(self, *args):
mock_store.pool.acquire = MagicMock(return_value=MockAcquire())

# Run refresh
await analyzer._refresh_all_counts(threshold=0.5)
await analyzer._do_refresh(threshold=0.5)

# Batched approach: 3 notes fit in 1 batch (batch_size=100), so 1 execute call
assert mock_conn.execute.call_count == 1
Expand Down
Loading