From 15bc833747ded47b84889b24c00ff9e66d719e28 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 21:37:02 -0500 Subject: [PATCH 1/7] fix: alembic env.py handles psycopg DSN format for AWARENESS_DATABASE_URL Production uses DSN format (host=X dbname=Y user=Z password=W) for psycopg. Alembic needs a SQLAlchemy URL. Auto-convert DSN to URL format when the value doesn't start with postgresql://. Co-Authored-By: Claude Opus 4.6 (1M context) --- alembic/env.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/alembic/env.py b/alembic/env.py index 5b230e08..0f1646c9 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -44,6 +44,20 @@ "Example: postgresql+psycopg://awareness:awareness-dev@localhost:5432/awareness" ) +# Convert psycopg DSN format (key=value pairs) to SQLAlchemy URL format. +# Production deployments use DSN format for psycopg; Alembic needs a URL. +if not database_url.startswith(("postgresql://", "postgresql+psycopg://")): + # Parse DSN key=value pairs: "host=X dbname=Y user=Z password=W port=P" + import re + + dsn_parts = dict(re.findall(r"(\w+)=(\S+)", database_url)) + host = dsn_parts.get("host", "localhost") + port = dsn_parts.get("port", "5432") + dbname = dsn_parts.get("dbname", "awareness") + user = dsn_parts.get("user", "awareness") + password = dsn_parts.get("password", "") + database_url = f"postgresql+psycopg://{user}:{password}@{host}:{port}/{dbname}" + # Ensure the URL uses a SQLAlchemy-compatible dialect prefix if database_url.startswith("postgresql://"): database_url = database_url.replace("postgresql://", "postgresql+psycopg://", 1) From fb3f4ab43faed6b05627b67d6286cc6cac0389d7 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:01:08 -0500 Subject: [PATCH 2/7] fix: robust DSN-to-URL conversion for alembic migrations Extract dsn_to_sqlalchemy_url() into helpers.py with proper handling of single-quoted DSN values, URL-encoding of special characters in user/password, and postgresql:// dialect normalization. Replace the fragile inline regex in alembic/env.py. Fix deploy.sh passing unsupported positional args to mcp-awareness-migrate. 10 new tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 4 +++ alembic/env.py | 22 ++++----------- scripts/holodeck/deploy.sh | 2 +- src/mcp_awareness/helpers.py | 43 ++++++++++++++++++++++++++++ tests/test_helpers.py | 54 ++++++++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1c68d1e..64b75344 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `update_intention` no longer echoes `state` or `reason`; now returns `{status, id}`. Verified in code that `state` was a pure pass-through with no coercion or auto-advancement, making it textbook echoed input. - **Breaking for clients that read these fields from write responses.** The data is trivially recoverable (caller already has it). The eight unchanged write tools (`report_status`, `report_alert`, `update_entry`, `suppress_alert`, `add_context`, `delete_entry`, `restore_entry`, `remind`) keep their existing shapes — handles or server-derived fields only. +### Fixed +- **Alembic DSN format handling** — `alembic/env.py` now converts psycopg DSN format (`host=X dbname=Y user=Z password=W`) to SQLAlchemy URL format via `dsn_to_sqlalchemy_url()` helper. Handles single-quoted values with spaces, URL-encodes special characters in user/password, and normalises `postgresql://` to `postgresql+psycopg://`. Fixes migration/backfill failures on production where `AWARENESS_DATABASE_URL` uses DSN format. Refs [#276](https://github.com/cmeans/mcp-awareness/pull/276). +- **Deploy script** — `scripts/holodeck/deploy.sh` maintenance mode no longer passes `upgrade head` positional args to `mcp-awareness-migrate` (which uses `--flags`, not positional args). Refs [#276](https://github.com/cmeans/mcp-awareness/pull/276). + ## [0.16.2] - 2026-04-09 ### Added diff --git a/alembic/env.py b/alembic/env.py index 0f1646c9..0e5fae3e 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -44,23 +44,11 @@ "Example: postgresql+psycopg://awareness:awareness-dev@localhost:5432/awareness" ) -# Convert psycopg DSN format (key=value pairs) to SQLAlchemy URL format. -# Production deployments use DSN format for psycopg; Alembic needs a URL. -if not database_url.startswith(("postgresql://", "postgresql+psycopg://")): - # Parse DSN key=value pairs: "host=X dbname=Y user=Z password=W port=P" - import re - - dsn_parts = dict(re.findall(r"(\w+)=(\S+)", database_url)) - host = dsn_parts.get("host", "localhost") - port = dsn_parts.get("port", "5432") - dbname = dsn_parts.get("dbname", "awareness") - user = dsn_parts.get("user", "awareness") - password = dsn_parts.get("password", "") - database_url = f"postgresql+psycopg://{user}:{password}@{host}:{port}/{dbname}" - -# Ensure the URL uses a SQLAlchemy-compatible dialect prefix -if database_url.startswith("postgresql://"): - database_url = database_url.replace("postgresql://", "postgresql+psycopg://", 1) +# Normalise to a SQLAlchemy-compatible URL. Production deployments often +# use psycopg DSN format (key=value pairs); Alembic/SQLAlchemy needs a URL. +from mcp_awareness.helpers import dsn_to_sqlalchemy_url # noqa: E402 + +database_url = dsn_to_sqlalchemy_url(database_url) def run_migrations_offline() -> None: diff --git a/scripts/holodeck/deploy.sh b/scripts/holodeck/deploy.sh index 66ccec63..e2735853 100755 --- a/scripts/holodeck/deploy.sh +++ b/scripts/holodeck/deploy.sh @@ -130,7 +130,7 @@ maintenance_deploy() { local first_ip first_ip=$(node_ip "${APP_NODES[0]}") update_node "$first_ip" - ssh "root@${first_ip}" 'cd /opt/mcp-awareness && sudo -u awareness bash -c "set -a && source /etc/awareness/env && set +a && /opt/mcp-awareness/venv/bin/mcp-awareness-migrate upgrade head"' + ssh "root@${first_ip}" 'cd /opt/mcp-awareness && sudo -u awareness bash -c "set -a && source /etc/awareness/env && set +a && /opt/mcp-awareness/venv/bin/mcp-awareness-migrate"' echo " Migration complete on ${first_ip}" wait_healthy "$first_ip" || echo " WARNING: ${first_ip} not healthy after migration" diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index c43c37e3..00e09fb7 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -41,6 +41,49 @@ DEFAULT_QUERY_LIMIT = 100 +def dsn_to_sqlalchemy_url(dsn: str) -> str: + """Convert a database connection string to a SQLAlchemy-compatible URL. + + Accepts either: + - A psycopg DSN (``host=X dbname=Y user=Z password=W port=P``) + - A URL (``postgresql://...`` or ``postgresql+psycopg://...``) + + DSN values may be single-quoted (``password='has spaces'``). + Special characters in user/password are percent-encoded for the URL. + + Always returns a ``postgresql+psycopg://`` URL. + """ + from urllib.parse import quote + + dsn = dsn.strip() + + # Already a URL — just normalise the dialect prefix + if dsn.startswith(("postgresql://", "postgresql+psycopg://")): + if dsn.startswith("postgresql://"): + dsn = "postgresql+psycopg://" + dsn[len("postgresql://") :] + return dsn + + # Parse psycopg key=value DSN. Values may be unquoted or single-quoted. + import re + + parts: dict[str, str] = {} + for m in re.finditer(r"(\w+)\s*=\s*(?:'((?:[^'\\]|\\.)*)'|(\S+))", dsn): + key = m.group(1) + # group(2) is the quoted value, group(3) the unquoted value + val = m.group(2) if m.group(2) is not None else m.group(3) + # Un-escape backslash sequences inside quoted values + if m.group(2) is not None: + val = val.replace("\\'", "'").replace("\\\\", "\\") + parts[key] = val + + host = parts.get("host", "localhost") + port = parts.get("port", "5432") + dbname = parts.get("dbname", "awareness") + user = quote(parts.get("user", "awareness"), safe="") + password = quote(parts.get("password", ""), safe="") + return f"postgresql+psycopg://{user}:{password}@{host}:{port}/{dbname}" + + def canonical_email(email: str) -> str: """Normalize email for uniqueness: strip +tags, dots for gmail, lowercase.""" email = email.lower().strip() diff --git a/tests/test_helpers.py b/tests/test_helpers.py index f3f30ced..180176db 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -27,6 +27,7 @@ _suggest, _validate_enum, _validate_timestamp, + dsn_to_sqlalchemy_url, ) @@ -77,6 +78,59 @@ def test_paginate_empty(): assert result["has_more"] is False +class TestDsnToSqlalchemyUrl: + """Test DSN-to-SQLAlchemy URL conversion used by alembic/env.py.""" + + def test_plain_dsn(self): + dsn = "host=db.local dbname=awareness user=admin password=secret port=5432" + assert dsn_to_sqlalchemy_url(dsn) == ( + "postgresql+psycopg://admin:secret@db.local:5432/awareness" + ) + + def test_dsn_defaults(self): + """Missing keys get sensible defaults.""" + assert dsn_to_sqlalchemy_url("host=myhost") == ( + "postgresql+psycopg://awareness:@myhost:5432/awareness" + ) + + def test_dsn_quoted_password_with_spaces(self): + dsn = "host=localhost dbname=db user=u password='my secret'" + assert dsn_to_sqlalchemy_url(dsn) == ( + "postgresql+psycopg://u:my%20secret@localhost:5432/db" + ) + + def test_dsn_password_with_at_sign(self): + dsn = "host=localhost dbname=db user=u password='p@ss'" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:p%40ss@localhost:5432/db") + + def test_dsn_password_with_slash(self): + dsn = "host=localhost dbname=db user=u password='a/b'" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:a%2Fb@localhost:5432/db") + + def test_dsn_escaped_quote_in_password(self): + dsn = r"host=localhost dbname=db user=u password='it\'s'" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:it%27s@localhost:5432/db") + + def test_url_passthrough_postgresql_psycopg(self): + url = "postgresql+psycopg://u:p@h:5432/db" + assert dsn_to_sqlalchemy_url(url) == url + + def test_url_passthrough_postgresql_plain(self): + """postgresql:// is rewritten to postgresql+psycopg://.""" + url = "postgresql://u:p@h:5432/db" + assert dsn_to_sqlalchemy_url(url) == "postgresql+psycopg://u:p@h:5432/db" + + def test_whitespace_stripped(self): + dsn = " host=localhost dbname=db " + result = dsn_to_sqlalchemy_url(dsn) + assert result.startswith("postgresql+psycopg://") + + def test_unquoted_special_chars_encoded(self): + """Unquoted password with URL-special chars gets encoded.""" + dsn = "host=localhost dbname=db user=u password=p%ss" + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:p%25ss@localhost:5432/db") + + class TestLevenshtein: def test_identical_strings(self): assert _levenshtein("note", "note") == 0 From 8a4c82159951f2f58de8e8dcaba713c3e8a69995 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:15:13 -0500 Subject: [PATCH 3/7] =?UTF-8?q?fix:=20address=20QA=20findings=20=E2=80=94?= =?UTF-8?q?=20use=20psycopg.conninfo,=20forward=20extra=20params?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F1: Replace hand-rolled regex with psycopg.conninfo.conninfo_to_dict() for battle-tested DSN parsing. Extra params (sslmode, connect_timeout) forwarded as URL query string parameters. F2: Raise ValueError on empty/whitespace input instead of silently producing defaults. F3: Unix socket paths placed in query string, not netloc. F4: Fix README documenting broken `mcp-awareness-migrate upgrade head`. F6: Strengthen test_whitespace_stripped to assert full URL. F7: Remove self-referential PR refs from CHANGELOG. 864 tests (was 858). Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 5 ++-- README.md | 2 +- src/mcp_awareness/helpers.py | 58 ++++++++++++++++++++++-------------- tests/test_helpers.py | 37 +++++++++++++++++++++-- 4 files changed, 75 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64b75344..f48267ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,8 +34,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Breaking for clients that read these fields from write responses.** The data is trivially recoverable (caller already has it). The eight unchanged write tools (`report_status`, `report_alert`, `update_entry`, `suppress_alert`, `add_context`, `delete_entry`, `restore_entry`, `remind`) keep their existing shapes — handles or server-derived fields only. ### Fixed -- **Alembic DSN format handling** — `alembic/env.py` now converts psycopg DSN format (`host=X dbname=Y user=Z password=W`) to SQLAlchemy URL format via `dsn_to_sqlalchemy_url()` helper. Handles single-quoted values with spaces, URL-encodes special characters in user/password, and normalises `postgresql://` to `postgresql+psycopg://`. Fixes migration/backfill failures on production where `AWARENESS_DATABASE_URL` uses DSN format. Refs [#276](https://github.com/cmeans/mcp-awareness/pull/276). -- **Deploy script** — `scripts/holodeck/deploy.sh` maintenance mode no longer passes `upgrade head` positional args to `mcp-awareness-migrate` (which uses `--flags`, not positional args). Refs [#276](https://github.com/cmeans/mcp-awareness/pull/276). +- **Alembic DSN format handling** — `alembic/env.py` now converts psycopg DSN format (`host=X dbname=Y user=Z password=W`) to SQLAlchemy URL format via `dsn_to_sqlalchemy_url()` helper. Delegates DSN parsing to `psycopg.conninfo.conninfo_to_dict()` for correctness; forwards extra params (sslmode, connect_timeout, etc.) as URL query string. Fixes migration/backfill failures on production where `AWARENESS_DATABASE_URL` uses DSN format. +- **Deploy script** — `scripts/holodeck/deploy.sh` maintenance mode no longer passes `upgrade head` positional args to `mcp-awareness-migrate` (which uses `--flags`, not positional args). +- **README** — fix documented `mcp-awareness-migrate upgrade head` syntax to match actual CLI interface (`mcp-awareness-migrate` with no positional args). ## [0.16.2] - 2026-04-09 diff --git a/README.md b/README.md index b5d7eede..fb3e98cf 100644 --- a/README.md +++ b/README.md @@ -408,7 +408,7 @@ For single-user deployments, secret path + WAF is sufficient. For multi-user, en ### Upgrading -When upgrading to a release with hybrid retrieval (Layer 1), running `mcp-awareness-migrate upgrade head` applies two migrations: +When upgrading to a release with hybrid retrieval (Layer 1), running `mcp-awareness-migrate` applies two migrations: 1. **Schema migration** — adds `language` (regconfig) and `tsv` (generated tsvector) columns to the entries table, plus GIN and partial indexes. Fast (DDL only). 2. **Language backfill** — runs lingua-py detection on all existing entries and updates the `language` column where a known language is detected. This is a one-time data migration that may take longer than usual on the first deploy: diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index 00e09fb7..bf79309e 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -48,14 +48,21 @@ def dsn_to_sqlalchemy_url(dsn: str) -> str: - A psycopg DSN (``host=X dbname=Y user=Z password=W port=P``) - A URL (``postgresql://...`` or ``postgresql+psycopg://...``) - DSN values may be single-quoted (``password='has spaces'``). - Special characters in user/password are percent-encoded for the URL. + DSN parsing delegates to ``psycopg.conninfo.conninfo_to_dict`` for + correctness (quoted values, sslmode, socket paths, etc.). Extra + parameters beyond host/port/dbname/user/password are forwarded as + URL query string parameters. + Raises ``ValueError`` on unparseable or empty input. Always returns a ``postgresql+psycopg://`` URL. """ - from urllib.parse import quote + from urllib.parse import quote, urlencode + + from psycopg.conninfo import conninfo_to_dict dsn = dsn.strip() + if not dsn: + raise ValueError("Database connection string must not be empty") # Already a URL — just normalise the dialect prefix if dsn.startswith(("postgresql://", "postgresql+psycopg://")): @@ -63,25 +70,32 @@ def dsn_to_sqlalchemy_url(dsn: str) -> str: dsn = "postgresql+psycopg://" + dsn[len("postgresql://") :] return dsn - # Parse psycopg key=value DSN. Values may be unquoted or single-quoted. - import re - - parts: dict[str, str] = {} - for m in re.finditer(r"(\w+)\s*=\s*(?:'((?:[^'\\]|\\.)*)'|(\S+))", dsn): - key = m.group(1) - # group(2) is the quoted value, group(3) the unquoted value - val = m.group(2) if m.group(2) is not None else m.group(3) - # Un-escape backslash sequences inside quoted values - if m.group(2) is not None: - val = val.replace("\\'", "'").replace("\\\\", "\\") - parts[key] = val - - host = parts.get("host", "localhost") - port = parts.get("port", "5432") - dbname = parts.get("dbname", "awareness") - user = quote(parts.get("user", "awareness"), safe="") - password = quote(parts.get("password", ""), safe="") - return f"postgresql+psycopg://{user}:{password}@{host}:{port}/{dbname}" + # Parse DSN via psycopg's battle-tested parser. + # conninfo_to_dict returns dict[str, Any] (values are str or int); + # coerce to str for URL construction. + raw = conninfo_to_dict(dsn) + if not raw: + raise ValueError(f"No connection parameters found in DSN: {dsn!r}") + parts: dict[str, str] = {k: str(v) for k, v in raw.items() if v is not None and v != ""} + + host = parts.pop("host", "") or "localhost" + port = parts.pop("port", "") or "5432" + dbname = parts.pop("dbname", "") or "awareness" + user = quote(parts.pop("user", "") or "awareness", safe="") + password = quote(parts.pop("password", "") or "", safe="") + + # Unix socket: host goes in query string, not netloc + if host.startswith("/"): + parts["host"] = host + host = "" + + base = f"postgresql+psycopg://{user}:{password}@{host}:{port}/{dbname}" + + # Forward remaining DSN params (sslmode, connect_timeout, etc.) + if parts: + base += "?" + urlencode(parts) + + return base def canonical_email(email: str) -> str: diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 180176db..0a3251f0 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -16,6 +16,7 @@ import json +import psycopg import pytest from mcp.server.fastmcp.exceptions import ToolError @@ -122,14 +123,46 @@ def test_url_passthrough_postgresql_plain(self): def test_whitespace_stripped(self): dsn = " host=localhost dbname=db " - result = dsn_to_sqlalchemy_url(dsn) - assert result.startswith("postgresql+psycopg://") + assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://awareness:@localhost:5432/db") def test_unquoted_special_chars_encoded(self): """Unquoted password with URL-special chars gets encoded.""" dsn = "host=localhost dbname=db user=u password=p%ss" assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://u:p%25ss@localhost:5432/db") + def test_extra_params_forwarded(self): + """sslmode and other extra DSN params become URL query string.""" + dsn = "host=db dbname=mydb user=u password=p port=5432 sslmode=require" + result = dsn_to_sqlalchemy_url(dsn) + assert result.startswith("postgresql+psycopg://u:p@db:5432/mydb?") + assert "sslmode=require" in result + + def test_multiple_extra_params(self): + dsn = "host=db dbname=mydb user=u password=p connect_timeout=10 sslmode=verify-full" + result = dsn_to_sqlalchemy_url(dsn) + assert "sslmode=verify-full" in result + assert "connect_timeout=10" in result + + def test_unix_socket_host(self): + """Unix socket path goes in query string, not netloc.""" + dsn = "host=/var/run/postgresql dbname=db user=u" + result = dsn_to_sqlalchemy_url(dsn) + assert "host=%2Fvar%2Frun%2Fpostgresql" in result + # netloc should have empty host + assert "://u:@:5432/db?" in result + + def test_empty_raises(self): + with pytest.raises(ValueError, match="must not be empty"): + dsn_to_sqlalchemy_url("") + + def test_whitespace_only_raises(self): + with pytest.raises(ValueError, match="must not be empty"): + dsn_to_sqlalchemy_url(" ") + + def test_garbage_raises(self): + with pytest.raises(psycopg.ProgrammingError): + dsn_to_sqlalchemy_url("garbage") + class TestLevenshtein: def test_identical_strings(self): From 524fd97a69f105368d3628234d5fead09b040dcb Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:19:16 -0500 Subject: [PATCH 4/7] fix: remove unreachable dead code branch for codecov The `if not raw` check after conninfo_to_dict is unreachable: empty input is caught before the call, and non-empty garbage raises ProgrammingError from psycopg. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index bf79309e..9e906a52 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -74,8 +74,6 @@ def dsn_to_sqlalchemy_url(dsn: str) -> str: # conninfo_to_dict returns dict[str, Any] (values are str or int); # coerce to str for URL construction. raw = conninfo_to_dict(dsn) - if not raw: - raise ValueError(f"No connection parameters found in DSN: {dsn!r}") parts: dict[str, str] = {k: str(v) for k, v in raw.items() if v is not None and v != ""} host = parts.pop("host", "") or "localhost" From 4f9d152da3147a79b3ab42e3c49dd8dc6c2ca946 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:33:41 -0500 Subject: [PATCH 5/7] =?UTF-8?q?docs:=20fix=20dsn=5Fto=5Fsqlalchemy=5Furl?= =?UTF-8?q?=20docstring=20=E2=80=94=20ProgrammingError=20not=20ValueError?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docstring claimed ValueError on unparseable input, but psycopg's conninfo_to_dict raises ProgrammingError. Align docs with behavior. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index 9e906a52..f2c983af 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -53,7 +53,8 @@ def dsn_to_sqlalchemy_url(dsn: str) -> str: parameters beyond host/port/dbname/user/password are forwarded as URL query string parameters. - Raises ``ValueError`` on unparseable or empty input. + Raises ``ValueError`` on empty input. Unparseable DSN strings + propagate ``psycopg.ProgrammingError`` from the underlying parser. Always returns a ``postgresql+psycopg://`` URL. """ from urllib.parse import quote, urlencode From ea0736004d4b850d8c3e07f9a69aeaa24fabe90d Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 22:36:58 -0500 Subject: [PATCH 6/7] fix: detect ambiguous @ in URL password, raise instead of silently misparsing URL passthrough now validates that the netloc doesn't contain multiple @ signs (which indicates an unencoded @ in the password). Raises ValueError with guidance to percent-encode as %40 or use DSN format. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/helpers.py | 13 +++++++++++-- tests/test_helpers.py | 10 ++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index f2c983af..ce742f30 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -57,7 +57,7 @@ def dsn_to_sqlalchemy_url(dsn: str) -> str: propagate ``psycopg.ProgrammingError`` from the underlying parser. Always returns a ``postgresql+psycopg://`` URL. """ - from urllib.parse import quote, urlencode + from urllib.parse import quote, urlencode, urlparse from psycopg.conninfo import conninfo_to_dict @@ -65,10 +65,19 @@ def dsn_to_sqlalchemy_url(dsn: str) -> str: if not dsn: raise ValueError("Database connection string must not be empty") - # Already a URL — just normalise the dialect prefix + # Already a URL — normalise the dialect prefix, validate credentials if dsn.startswith(("postgresql://", "postgresql+psycopg://")): if dsn.startswith("postgresql://"): dsn = "postgresql+psycopg://" + dsn[len("postgresql://") :] + # Detect ambiguous URLs where unencoded @ in password makes the + # netloc unparseable (e.g. "u:p@ss@host" → host looks like "ss"). + parsed = urlparse(dsn) + netloc = parsed.netloc + if netloc.count("@") > 1: + raise ValueError( + "Ambiguous URL: password appears to contain an unencoded '@'. " + "Percent-encode it as %40, or use DSN format instead." + ) return dsn # Parse DSN via psycopg's battle-tested parser. diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 0a3251f0..ce9ff4c7 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -121,6 +121,16 @@ def test_url_passthrough_postgresql_plain(self): url = "postgresql://u:p@h:5432/db" assert dsn_to_sqlalchemy_url(url) == "postgresql+psycopg://u:p@h:5432/db" + def test_url_ambiguous_at_in_password_raises(self): + """Unencoded @ in password makes URL ambiguous — must raise.""" + with pytest.raises(ValueError, match="unencoded '@'"): + dsn_to_sqlalchemy_url("postgresql://u:p@ss@h:5432/db") + + def test_url_encoded_at_in_password_ok(self): + """Properly percent-encoded @ in password passes through.""" + url = "postgresql+psycopg://u:p%40ss@h:5432/db" + assert dsn_to_sqlalchemy_url(url) == url + def test_whitespace_stripped(self): dsn = " host=localhost dbname=db " assert dsn_to_sqlalchemy_url(dsn) == ("postgresql+psycopg://awareness:@localhost:5432/db") From 43adbd2efe2adaf0e41f685acbeade66e6e9ce98 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 12 Apr 2026 23:05:23 -0500 Subject: [PATCH 7/7] docs: document DSN quoting requirement for env files Shell splits unquoted DSN values on spaces, silently dropping password/user/dbname. Document that DSN format values must be quoted in env files across README, data dictionary, migrate.py error message, and alembic/env.py error message. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 1 + README.md | 2 +- alembic/env.py | 4 +++- docs/data-dictionary.md | 2 +- src/mcp_awareness/migrate.py | 10 +++++++++- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f48267ea..bb2b6a2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Alembic DSN format handling** — `alembic/env.py` now converts psycopg DSN format (`host=X dbname=Y user=Z password=W`) to SQLAlchemy URL format via `dsn_to_sqlalchemy_url()` helper. Delegates DSN parsing to `psycopg.conninfo.conninfo_to_dict()` for correctness; forwards extra params (sslmode, connect_timeout, etc.) as URL query string. Fixes migration/backfill failures on production where `AWARENESS_DATABASE_URL` uses DSN format. - **Deploy script** — `scripts/holodeck/deploy.sh` maintenance mode no longer passes `upgrade head` positional args to `mcp-awareness-migrate` (which uses `--flags`, not positional args). - **README** — fix documented `mcp-awareness-migrate upgrade head` syntax to match actual CLI interface (`mcp-awareness-migrate` with no positional args). +- **Docs** — document that `AWARENESS_DATABASE_URL` accepts both URL and DSN formats, and that DSN values must be quoted in env files to prevent shell space-splitting. Updated in README, data dictionary, `migrate.py` error message, and `alembic/env.py` error message. ## [0.16.2] - 2026-04-09 diff --git a/README.md b/README.md index fb3e98cf..c7bb5e8b 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ The server is running on port 8420. Point any MCP client at `http://localhost:84 | `AWARENESS_TRANSPORT` | `stdio` | Transport: `stdio` or `streamable-http` | | `AWARENESS_HOST` | `0.0.0.0` | Bind address (HTTP mode) | | `AWARENESS_PORT` | `8420` | Port (HTTP mode) | -| `AWARENESS_DATABASE_URL` | _(required)_ | PostgreSQL connection string. Example: `postgresql://user:pass@localhost:5432/awareness` | +| `AWARENESS_DATABASE_URL` | _(required)_ | PostgreSQL connection string. Accepts URL format (`postgresql://user:pass@host:5432/db`) or psycopg DSN format (`host=X dbname=Y user=Z password=W`). **If using DSN format in an env file, quote the value** — unquoted spaces cause the shell to split it into separate assignments. Example: `AWARENESS_DATABASE_URL="host=db dbname=awareness user=awareness password=secret"` | | `AWARENESS_MOUNT_PATH` | _(none)_ | Secret path prefix for access control (e.g., `/my-secret`). When set, only `//mcp` is served; all other paths return 404. Use with a Cloudflare WAF rule. | #### Embedding (optional) diff --git a/alembic/env.py b/alembic/env.py index 0e5fae3e..1606bb4d 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -41,7 +41,9 @@ if not database_url: raise ValueError( "AWARENESS_DATABASE_URL environment variable is required. " - "Example: postgresql+psycopg://awareness:awareness-dev@localhost:5432/awareness" + 'Example (URL): postgresql+psycopg://user:pass@localhost:5432/awareness ' + 'Example (DSN): "host=localhost dbname=awareness user=user password=pass" ' + "Note: DSN values with spaces must be quoted in env files." ) # Normalise to a SQLAlchemy-compatible URL. Production deployments often diff --git a/docs/data-dictionary.md b/docs/data-dictionary.md index fd8f3059..704d6be2 100644 --- a/docs/data-dictionary.md +++ b/docs/data-dictionary.md @@ -335,7 +335,7 @@ Use cases: decision → context, intention → action, note → note ("see also" - **WAL level:** `wal_level=logical` configured for Debezium CDC readiness and logical replication - **Replication slots:** `max_replication_slots=4` for future replication/CDC - **Background cleanup:** Daemon thread with its own psycopg connection, debounced (10s), with alive-check guard to prevent thread accumulation -- **Connection string:** Configured via `AWARENESS_DATABASE_URL` (e.g., `postgresql://user:pass@localhost:5432/awareness`) +- **Connection string:** Configured via `AWARENESS_DATABASE_URL`. Accepts URL format (`postgresql://user:pass@host:5432/db`) or psycopg DSN format (`host=X dbname=Y user=Z password=W`). If using DSN format in an env file sourced by the shell, **the value must be quoted** to prevent space-splitting (e.g., `AWARENESS_DATABASE_URL="host=db dbname=awareness user=u password=p"`) - **Docker image:** `pgvector/pgvector:pg17` (PostgreSQL 17 with pgvector pre-installed) - **Schema migrations:** Managed by Alembic (raw SQL, no ORM). Migration files in `alembic/versions/`. Run `mcp-awareness-migrate` or `alembic upgrade head`. Version tracked in `alembic_version` table. diff --git a/src/mcp_awareness/migrate.py b/src/mcp_awareness/migrate.py index 962d6ec7..26eb3b54 100644 --- a/src/mcp_awareness/migrate.py +++ b/src/mcp_awareness/migrate.py @@ -47,7 +47,15 @@ def main() -> None: if not database_url: print("Error: AWARENESS_DATABASE_URL is required.", file=sys.stderr) print( - "Example: AWARENESS_DATABASE_URL=postgresql://user:pass@localhost:5432/awareness", + "Example (URL): AWARENESS_DATABASE_URL=postgresql://user:pass@localhost:5432/awareness", + file=sys.stderr, + ) + print( + 'Example (DSN): AWARENESS_DATABASE_URL="host=localhost dbname=db user=u password=p"', + file=sys.stderr, + ) + print( + "Note: DSN values with spaces must be quoted in env files.", file=sys.stderr, ) sys.exit(1)