diff --git a/CHANGELOG.md b/CHANGELOG.md index f1c68d1..bb2b6a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `update_intention` no longer echoes `state` or `reason`; now returns `{status, id}`. Verified in code that `state` was a pure pass-through with no coercion or auto-advancement, making it textbook echoed input. - **Breaking for clients that read these fields from write responses.** The data is trivially recoverable (caller already has it). The eight unchanged write tools (`report_status`, `report_alert`, `update_entry`, `suppress_alert`, `add_context`, `delete_entry`, `restore_entry`, `remind`) keep their existing shapes — handles or server-derived fields only. +### Fixed +- **Alembic DSN format handling** — `alembic/env.py` now converts psycopg DSN format (`host=X dbname=Y user=Z password=W`) to SQLAlchemy URL format via `dsn_to_sqlalchemy_url()` helper. Delegates DSN parsing to `psycopg.conninfo.conninfo_to_dict()` for correctness; forwards extra params (sslmode, connect_timeout, etc.) as URL query string. Fixes migration/backfill failures on production where `AWARENESS_DATABASE_URL` uses DSN format. +- **Deploy script** — `scripts/holodeck/deploy.sh` maintenance mode no longer passes `upgrade head` positional args to `mcp-awareness-migrate` (which uses `--flags`, not positional args). +- **README** — fix documented `mcp-awareness-migrate upgrade head` syntax to match actual CLI interface (`mcp-awareness-migrate` with no positional args). +- **Docs** — document that `AWARENESS_DATABASE_URL` accepts both URL and DSN formats, and that DSN values must be quoted in env files to prevent shell space-splitting. Updated in README, data dictionary, `migrate.py` error message, and `alembic/env.py` error message. + ## [0.16.2] - 2026-04-09 ### Added diff --git a/README.md b/README.md index b5d7eed..c7bb5e8 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ The server is running on port 8420. Point any MCP client at `http://localhost:84 | `AWARENESS_TRANSPORT` | `stdio` | Transport: `stdio` or `streamable-http` | | `AWARENESS_HOST` | `0.0.0.0` | Bind address (HTTP mode) | | `AWARENESS_PORT` | `8420` | Port (HTTP mode) | -| `AWARENESS_DATABASE_URL` | _(required)_ | PostgreSQL connection string. Example: `postgresql://user:pass@localhost:5432/awareness` | +| `AWARENESS_DATABASE_URL` | _(required)_ | PostgreSQL connection string. Accepts URL format (`postgresql://user:pass@host:5432/db`) or psycopg DSN format (`host=X dbname=Y user=Z password=W`). **If using DSN format in an env file, quote the value** — unquoted spaces cause the shell to split it into separate assignments. Example: `AWARENESS_DATABASE_URL="host=db dbname=awareness user=awareness password=secret"` | | `AWARENESS_MOUNT_PATH` | _(none)_ | Secret path prefix for access control (e.g., `/my-secret`). When set, only `//mcp` is served; all other paths return 404. Use with a Cloudflare WAF rule. | #### Embedding (optional) @@ -408,7 +408,7 @@ For single-user deployments, secret path + WAF is sufficient. For multi-user, en ### Upgrading -When upgrading to a release with hybrid retrieval (Layer 1), running `mcp-awareness-migrate upgrade head` applies two migrations: +When upgrading to a release with hybrid retrieval (Layer 1), running `mcp-awareness-migrate` applies two migrations: 1. **Schema migration** — adds `language` (regconfig) and `tsv` (generated tsvector) columns to the entries table, plus GIN and partial indexes. Fast (DDL only). 2. **Language backfill** — runs lingua-py detection on all existing entries and updates the `language` column where a known language is detected. This is a one-time data migration that may take longer than usual on the first deploy: diff --git a/alembic/env.py b/alembic/env.py index 5b230e0..1606bb4 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -41,12 +41,16 @@ if not database_url: raise ValueError( "AWARENESS_DATABASE_URL environment variable is required. " - "Example: postgresql+psycopg://awareness:awareness-dev@localhost:5432/awareness" + 'Example (URL): postgresql+psycopg://user:pass@localhost:5432/awareness ' + 'Example (DSN): "host=localhost dbname=awareness user=user password=pass" ' + "Note: DSN values with spaces must be quoted in env files." ) -# Ensure the URL uses a SQLAlchemy-compatible dialect prefix -if database_url.startswith("postgresql://"): - database_url = database_url.replace("postgresql://", "postgresql+psycopg://", 1) +# Normalise to a SQLAlchemy-compatible URL. Production deployments often +# use psycopg DSN format (key=value pairs); Alembic/SQLAlchemy needs a URL. +from mcp_awareness.helpers import dsn_to_sqlalchemy_url # noqa: E402 + +database_url = dsn_to_sqlalchemy_url(database_url) def run_migrations_offline() -> None: diff --git a/docs/data-dictionary.md b/docs/data-dictionary.md index fd8f305..704d6be 100644 --- a/docs/data-dictionary.md +++ b/docs/data-dictionary.md @@ -335,7 +335,7 @@ Use cases: decision → context, intention → action, note → note ("see also" - **WAL level:** `wal_level=logical` configured for Debezium CDC readiness and logical replication - **Replication slots:** `max_replication_slots=4` for future replication/CDC - **Background cleanup:** Daemon thread with its own psycopg connection, debounced (10s), with alive-check guard to prevent thread accumulation -- **Connection string:** Configured via `AWARENESS_DATABASE_URL` (e.g., `postgresql://user:pass@localhost:5432/awareness`) +- **Connection string:** Configured via `AWARENESS_DATABASE_URL`. Accepts URL format (`postgresql://user:pass@host:5432/db`) or psycopg DSN format (`host=X dbname=Y user=Z password=W`). If using DSN format in an env file sourced by the shell, **the value must be quoted** to prevent space-splitting (e.g., `AWARENESS_DATABASE_URL="host=db dbname=awareness user=u password=p"`) - **Docker image:** `pgvector/pgvector:pg17` (PostgreSQL 17 with pgvector pre-installed) - **Schema migrations:** Managed by Alembic (raw SQL, no ORM). Migration files in `alembic/versions/`. Run `mcp-awareness-migrate` or `alembic upgrade head`. Version tracked in `alembic_version` table. diff --git a/scripts/holodeck/deploy.sh b/scripts/holodeck/deploy.sh index 66ccec6..e273585 100755 --- a/scripts/holodeck/deploy.sh +++ b/scripts/holodeck/deploy.sh @@ -130,7 +130,7 @@ maintenance_deploy() { local first_ip first_ip=$(node_ip "${APP_NODES[0]}") update_node "$first_ip" - ssh "root@${first_ip}" 'cd /opt/mcp-awareness && sudo -u awareness bash -c "set -a && source /etc/awareness/env && set +a && /opt/mcp-awareness/venv/bin/mcp-awareness-migrate upgrade head"' + ssh "root@${first_ip}" 'cd /opt/mcp-awareness && sudo -u awareness bash -c "set -a && source /etc/awareness/env && set +a && /opt/mcp-awareness/venv/bin/mcp-awareness-migrate"' echo " Migration complete on ${first_ip}" wait_healthy "$first_ip" || echo " WARNING: ${first_ip} not healthy after migration" diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index c43c37e..ce742f3 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -41,6 +41,71 @@ DEFAULT_QUERY_LIMIT = 100 +def dsn_to_sqlalchemy_url(dsn: str) -> str: + """Convert a database connection string to a SQLAlchemy-compatible URL. + + Accepts either: + - A psycopg DSN (``host=X dbname=Y user=Z password=W port=P``) + - A URL (``postgresql://...`` or ``postgresql+psycopg://...``) + + DSN parsing delegates to ``psycopg.conninfo.conninfo_to_dict`` for + correctness (quoted values, sslmode, socket paths, etc.). Extra + parameters beyond host/port/dbname/user/password are forwarded as + URL query string parameters. + + Raises ``ValueError`` on empty input. Unparseable DSN strings + propagate ``psycopg.ProgrammingError`` from the underlying parser. + Always returns a ``postgresql+psycopg://`` URL. + """ + from urllib.parse import quote, urlencode, urlparse + + from psycopg.conninfo import conninfo_to_dict + + dsn = dsn.strip() + if not dsn: + raise ValueError("Database connection string must not be empty") + + # Already a URL — normalise the dialect prefix, validate credentials + if dsn.startswith(("postgresql://", "postgresql+psycopg://")): + if dsn.startswith("postgresql://"): + dsn = "postgresql+psycopg://" + dsn[len("postgresql://") :] + # Detect ambiguous URLs where unencoded @ in password makes the + # netloc unparseable (e.g. "u:p@ss@host" → host looks like "ss"). + parsed = urlparse(dsn) + netloc = parsed.netloc + if netloc.count("@") > 1: + raise ValueError( + "Ambiguous URL: password appears to contain an unencoded '@'. " + "Percent-encode it as %40, or use DSN format instead." + ) + return dsn + + # Parse DSN via psycopg's battle-tested parser. + # conninfo_to_dict returns dict[str, Any] (values are str or int); + # coerce to str for URL construction. + raw = conninfo_to_dict(dsn) + parts: dict[str, str] = {k: str(v) for k, v in raw.items() if v is not None and v != ""} + + host = parts.pop("host", "") or "localhost" + port = parts.pop("port", "") or "5432" + dbname = parts.pop("dbname", "") or "awareness" + user = quote(parts.pop("user", "") or "awareness", safe="") + password = quote(parts.pop("password", "") or "", safe="") + + # Unix socket: host goes in query string, not netloc + if host.startswith("/"): + parts["host"] = host + host = "" + + base = f"postgresql+psycopg://{user}:{password}@{host}:{port}/{dbname}" + + # Forward remaining DSN params (sslmode, connect_timeout, etc.) + if parts: + base += "?" + urlencode(parts) + + return base + + def canonical_email(email: str) -> str: """Normalize email for uniqueness: strip +tags, dots for gmail, lowercase.""" email = email.lower().strip() diff --git a/src/mcp_awareness/migrate.py b/src/mcp_awareness/migrate.py index 962d6ec..26eb3b5 100644 --- a/src/mcp_awareness/migrate.py +++ b/src/mcp_awareness/migrate.py @@ -47,7 +47,15 @@ def main() -> None: if not database_url: print("Error: AWARENESS_DATABASE_URL is required.", file=sys.stderr) print( - "Example: AWARENESS_DATABASE_URL=postgresql://user:pass@localhost:5432/awareness", + "Example (URL): AWARENESS_DATABASE_URL=postgresql://user:pass@localhost:5432/awareness", + file=sys.stderr, + ) + print( + 'Example (DSN): AWARENESS_DATABASE_URL="host=localhost dbname=db user=u password=p"', + file=sys.stderr, + ) + print( + "Note: DSN values with spaces must be quoted in env files.", file=sys.stderr, ) sys.exit(1) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index f3f30ce..ce9ff4c 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -16,6 +16,7 @@ import json +import psycopg import pytest from mcp.server.fastmcp.exceptions import ToolError @@ -27,6 +28,7 @@ _suggest, _validate_enum, _validate_timestamp, + dsn_to_sqlalchemy_url, ) @@ -77,6 +79,101 @@ def test_paginate_empty(): assert result["has_more"] is False +class TestDsnToSqlalchemyUrl: + """Test DSN-to-SQLAlchemy URL conversion used by alembic/env.py.""" + + def test_plain_dsn(self): + dsn = "host=db.local dbname=awareness user=admin password=secret port=5432" + assert dsn_to_sqlalchemy_url(dsn) == ( + "postgresql+psycopg://admin:secret@db.local:5432/awareness" + ) + + def test_dsn_defaults(self): + """Missing keys get sensible defaults.""" + assert dsn_to_sqlalchemy_url("host=myhost") == ( + "postgresql+psycopg://awareness:@myhost:5432/awareness" + ) + + def test_dsn_quoted_password_with_spaces(self): + dsn = "host=localhost dbname=db user=u password='my secret'" + assert dsn_to_sqlalchemy_url(dsn) == ( + "postgresql+psycopg://u:my%20secret@localhost:5432/db" + ) + + def test_dsn_password_with_at_sign(self): + dsn = "host=localhost dbname=db user=u password='p@ss'" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:p%40ss@localhost:5432/db") + + def test_dsn_password_with_slash(self): + dsn = "host=localhost dbname=db user=u password='a/b'" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:a%2Fb@localhost:5432/db") + + def test_dsn_escaped_quote_in_password(self): + dsn = r"host=localhost dbname=db user=u password='it\'s'" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:it%27s@localhost:5432/db") + + def test_url_passthrough_postgresql_psycopg(self): + url = "postgresql+psycopg://u:p@h:5432/db" + assert dsn_to_sqlalchemy_url(url) == url + + def test_url_passthrough_postgresql_plain(self): + """postgresql:// is rewritten to postgresql+psycopg://.""" + url = "postgresql://u:p@h:5432/db" + assert dsn_to_sqlalchemy_url(url) == "postgresql+psycopg://u:p@h:5432/db" + + def test_url_ambiguous_at_in_password_raises(self): + """Unencoded @ in password makes URL ambiguous — must raise.""" + with pytest.raises(ValueError, match="unencoded '@'"): + dsn_to_sqlalchemy_url("postgresql://u:p@ss@h:5432/db") + + def test_url_encoded_at_in_password_ok(self): + """Properly percent-encoded @ in password passes through.""" + url = "postgresql+psycopg://u:p%40ss@h:5432/db" + assert dsn_to_sqlalchemy_url(url) == url + + def test_whitespace_stripped(self): + dsn = " host=localhost dbname=db " + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://awareness:@localhost:5432/db") + + def test_unquoted_special_chars_encoded(self): + """Unquoted password with URL-special chars gets encoded.""" + dsn = "host=localhost dbname=db user=u password=p%ss" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:p%25ss@localhost:5432/db") + + def test_extra_params_forwarded(self): + """sslmode and other extra DSN params become URL query string.""" + dsn = "host=db dbname=mydb user=u password=p port=5432 sslmode=require" + result = dsn_to_sqlalchemy_url(dsn) + assert result.startswith("postgresql+psycopg://u:p@db:5432/mydb?") + assert "sslmode=require" in result + + def test_multiple_extra_params(self): + dsn = "host=db dbname=mydb user=u password=p connect_timeout=10 sslmode=verify-full" + result = dsn_to_sqlalchemy_url(dsn) + assert "sslmode=verify-full" in result + assert "connect_timeout=10" in result + + def test_unix_socket_host(self): + """Unix socket path goes in query string, not netloc.""" + dsn = "host=/var/run/postgresql dbname=db user=u" + result = dsn_to_sqlalchemy_url(dsn) + assert "host=%2Fvar%2Frun%2Fpostgresql" in result + # netloc should have empty host + assert "://u:@:5432/db?" in result + + def test_empty_raises(self): + with pytest.raises(ValueError, match="must not be empty"): + dsn_to_sqlalchemy_url("") + + def test_whitespace_only_raises(self): + with pytest.raises(ValueError, match="must not be empty"): + dsn_to_sqlalchemy_url(" ") + + def test_garbage_raises(self): + with pytest.raises(psycopg.ProgrammingError): + dsn_to_sqlalchemy_url("garbage") + + class TestLevenshtein: def test_identical_strings(self): assert _levenshtein("note", "note") == 0