From 885ede743134d7e4adba5367bc9a6fbf691c1db2 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 20:55:36 -0500 Subject: [PATCH 01/28] docs: add schema + record entry types design spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full design for implementing EntryType.SCHEMA and EntryType.RECORD — JSON Schema Draft 2020-12 validation on write, per-owner with _system fallback, absolute schema immutability, record re-validation on update, CLI-only writes to _system owner, structured validation error envelope. Closes design phase for #208. Implementation plan follows. Co-Authored-By: Claude Opus 4.6 (1M context) --- ...-04-13-schema-record-entry-types-design.md | 409 ++++++++++++++++++ 1 file changed, 409 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md diff --git a/docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md b/docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md new file mode 100644 index 0000000..f3ee53a --- /dev/null +++ b/docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md @@ -0,0 +1,409 @@ +# Schema + Record entry types — Design Spec + +**Date:** 2026-04-13 +**Issue:** [#208](https://github.com/cmeans/mcp-awareness/issues/208) +**Related awareness entries:** `design-schema-record-secrets` (`53b378b2`), intention `3117644f` +**Scope:** Implementation steps 1–2 of the awareness-edge prerequisite design. Secrets infrastructure (step 3+) is a separate follow-up. + +## Problem + +mcp-awareness stores arbitrary agent-written entries. Edge providers and the future tag taxonomy layer need *typed data contracts* — schemas that define a shape, and records that conform to those shapes with server-side validation on write. Without this, the entire edge config pattern (manifests, provider preferences, target configs) rests on implicit naming conventions with no validation — typos silently fall through to defaults, flagged as the #1 practical pain point in the edge design review. + +This spec defines two new `EntryType` values — `schema` and `record` — with JSON Schema Draft 2020-12 validation on write, plus a system-owner fallback so canonical shared schemas can ship with the server. + +## Goals + +- Agents can register schemas via MCP and write records validated against them. +- Server-side enforcement: invalid schemas never stored; invalid records never stored. +- Canonical schemas (edge-manifest, edge-identity, eventually tag taxonomy) can live in a shared `_system` namespace, with per-user schemas as an override layer. +- Structured error responses listing *all* validation failures in one round trip. +- No new tool surface beyond two type-specific write tools; existing `update_entry` / `delete_entry` absorb the new types. + +## Non-goals + +- Secrets (`x-secret` encryption, one-time token web form, edge decrypt endpoint) — separate follow-up PR. +- Admin-via-MCP authorization (`is_admin` column on users) — deferred until actually needed. +- Cross-schema `$ref` resolution via `referencing.Registry` — deferred until a real use case demands it. +- Validator caching by schema version — deferred until throughput data justifies it. +- Backwards-compatibility shims for the historical "Structure"/"Structured"/"Secret" naming — superseded names; new implementation uses `schema`/`record`/`x-secret`. + +## Design decisions + +### D1. Tool surface: type-specific write tools + +Two new MCP tools — `register_schema` and `create_record` — matching the existing convention of one type-specific tool per writable entry type (`remember` → note, `learn_pattern` → pattern, etc.). The MCP Bench audit flagged the 29-tool surface as bloated, but extending `remember` with polymorphic `entry_type` would muddy its semantic. A future PR may unify all write tools behind a generic `create_entry`; that is a separate refactor across existing tools, not scope for this work. + +### D2. Multi-tenancy: per-owner with `_system` fallback + +Schemas are scoped by `owner_id`. A reserved `_system` owner holds shared canonical schemas. Schema lookup queries `WHERE logical_key=? AND owner_id IN (caller, '_system') ORDER BY CASE WHEN owner_id=caller THEN 0 ELSE 1 END LIMIT 1` — caller's own schema wins over `_system` when both exist, giving operators predictable override semantics. + +### D3. `_system` write mechanism: CLI only + +A new console script `mcp-awareness-register-schema --system ...` writes `_system`-owned schemas, bypassing MCP. Operators (DB access + server config) seed built-in schemas at deploy/bootstrap time. No `is_admin` column, no MCP authz plumbing — bootstrap is a deploy-time concern, not agent-accessible. + +### D4. Schema immutability: absolute + +`update_entry` on a `schema` entry always returns `schema_immutable`. To change a schema, register a new version; if the old version has no non-deleted records, soft-delete it. Matches the spec's "new version = new entry" framing and removes state-dependent authoring behavior. + +### D5. Record mutability: re-validated on content change + +`update_entry` on a `record` entry re-resolves the pinned schema and re-validates `content` on update. Updates that fail re-validation are rejected; the record is left unchanged. Non-content field updates (tags, description, source) skip re-validation. `schema_ref` and `schema_version` are immutable on records — records pin to an exact schema version and cannot be re-targeted. + +### D6. Validation error reporting: all errors via `iter_errors()` + +The `validation_failed` envelope includes a `validation_errors` list with one entry per `iter_errors()` yield, sorted by `path`. Each entry has `path` (from `ValidationError.json_path`), `message`, `validator` (the failing JSON Schema keyword), and `schema_path`. Truncated at 50 errors with `truncated: true, total_errors: ` if more. + +### D7. `logical_key` derivation: server-side + +For schemas, the caller passes `family` and `version`; the server derives `logical_key = f"{family}:{version}"`. Single source of truth; impossible to end up with a mismatch. Records mirror the derivation on lookup: `resolve_schema` composes the target `logical_key` from the record's `schema_ref` + `schema_version`. + +### D8. Record `content`: any JSON value + +`data.content` on record entries accepts any JSON-serializable value (dict, list, primitive, null) — matches JSON Schema's ability to validate any value, and matches the existing polymorphic `content` parameter on `remember`. Ruling it out now would create a future migration for no real benefit. + +## Architecture + +### New module: `src/mcp_awareness/validation.py` + +Pure functions, no I/O side effects except the store-lookup helper. Keeps `jsonschema` out of the store layer (preserves Store protocol as swappable) and makes validation unit-testable without Postgres. + +| Function | Purpose | +|---|---| +| `validate_schema_body(schema: dict) -> None` | `Draft202012Validator.check_schema(schema)`. Translates `SchemaError` into structured `invalid_schema` error. | +| `resolve_schema(store, owner_id, family, version) -> Entry \| None` | Caller-owner lookup first, `_system` fallback. Excludes soft-deleted. | +| `validate_record_content(schema_body: dict, content: Any) -> list[dict]` | Runs `iter_errors()`, returns sorted list of error dicts. Empty list = valid. | +| `compose_schema_logical_key(family: str, version: str) -> str` | Single place the format lives: `f"{family}:{version}"`. | +| `assert_schema_deletable(store, owner_id, logical_key) -> None` | Queries referencing records. Raises `schema_in_use` with blocker list if any. | +| `collect_validation_errors(validator, instance) -> list[dict]` | Internal helper; handles truncation at 50. | + +### Store protocol changes (`src/mcp_awareness/store.py`, `postgres_store.py`) + +Two new methods on the `Store` protocol: + +- `find_schema(owner_id: str, logical_key: str) -> Entry | None` — single-query schema lookup honoring the `_system` fallback and soft-delete exclusion. +- `count_records_referencing(owner_id: str, schema_logical_key: str) -> tuple[int, list[str]]` — supports schema-delete protection. Returns total count and up to N (default 10) referencing record IDs for the error envelope. + +Existing `save_entry` / write paths absorb the new entry types unchanged — the `type` field is a TEXT enum value change, not a structural change. + +### Tool surface changes (`src/mcp_awareness/tools.py`) + +- **New:** `register_schema(source, tags, description, family, version, schema, learned_from="conversation") -> str` +- **New:** `create_record(source, tags, description, logical_key, schema_ref, schema_version, content, learned_from="conversation") -> str` +- **Modified:** `update_entry` branches on `entry.type`: + - `SCHEMA` → always `schema_immutable`. + - `RECORD` with content change → re-resolve schema, re-validate, reject on failure. + - `RECORD` attempting to change `schema_ref`/`schema_version` → `record_schema_pin_immutable`. + - Other types → existing behavior. +- **Modified:** `delete_entry` branches on `entry.type == SCHEMA` to run deletion protection before soft-delete. + +Response payloads trimmed to only what the caller didn't provide: + +- `register_schema` returns `{"status": "ok", "id", "logical_key"}` (`logical_key` is server-derived). +- `create_record` returns `{"status": "ok", "id", "action": "created" | "updated"}`. + +### EntryType additions (`src/mcp_awareness/schema.py`) + +```python +class EntryType(str, Enum): + # ... existing eight values ... + SCHEMA = "schema" + RECORD = "record" +``` + +No DB-level CHECK constraint on `entries.type` (there isn't one today); Python-layer `_parse_entry_type` handles invalid input with structured errors. + +### CLI tool: `src/mcp_awareness/cli_register_schema.py` + +New console script `mcp-awareness-register-schema`. Registered in `pyproject.toml` as `[project.scripts]`. + +``` +mcp-awareness-register-schema --system \ + --family schema:edge-manifest \ + --version 1.0.0 \ + --schema-file edge-manifest.json \ + --source awareness-built-in \ + --tags "schema,edge" \ + --description "Edge provider manifest schema" +``` + +Argparse validation, direct `PostgresStore` construction (no MCP / middleware / auth), writes with `owner_id="_system"` and `learned_from="cli-bootstrap"`. Skips embedding submission — CLI bootstrap shouldn't require an embedding provider. + +## Data model + +Both new types reuse the existing `Entry` dataclass and `entries` table. Schema body and record content live in the JSONB `data` column (**not** the `content` string field — avoids the Pydantic JSON-deserialization bug in awareness entry `5bc732c1`). + +### Schema entry + +```python +Entry( + type=EntryType.SCHEMA, + source=source, + tags=tags, + data={ + "family": "schema:edge-manifest", + "version": "1.0.0", + "schema": { ... JSON Schema body as dict ... }, + "description": description, + "learned_from": learned_from, + }, + logical_key="schema:edge-manifest:1.0.0", # server-derived + owner_id=current_owner(), # _system only via CLI + language="english", +) +``` + +### Record entry + +```python +Entry( + type=EntryType.RECORD, + source=source, + tags=tags, + data={ + "schema_ref": "schema:edge-manifest", + "schema_version": "1.0.0", + "content": { ... any JSON value, validated ... }, + "description": description, + "learned_from": learned_from, + }, + logical_key=caller_chosen, # supports upsert + owner_id=current_owner(), # records never write to _system + language=resolve_language(...), +) +``` + +### Uniqueness and lookup + +The existing partial unique index `(owner_id, source, logical_key) WHERE logical_key IS NOT NULL AND deleted IS NULL` enforces: + +- Per-(owner, source) uniqueness for both types via `logical_key`. +- Natural upsert path for records via the existing `remember`-style upsert machinery. + +Cross-owner schema lookup issues a single query preferring caller-owned over `_system`: + +```sql +SELECT * FROM entries +WHERE type = 'schema' + AND logical_key = %(logical_key)s + AND owner_id IN (%(caller)s, '_system') + AND deleted IS NULL +ORDER BY CASE WHEN owner_id = %(caller)s THEN 0 ELSE 1 END +LIMIT 1 +``` + +## `jsonschema` integration + +- **Library version:** `jsonschema >= 4.26.0` (current PyPI latest, confirmed 2026-04-13). Added to main deps in `pyproject.toml` (not dev). Pulls `attrs`, `jsonschema-specifications`, `referencing`, `rpds-py` (wheels available for all supported platforms). +- **Meta-schema validation:** `Draft202012Validator.check_schema(schema_body)`. Raises `jsonschema.exceptions.SchemaError` on invalid schema. +- **Record validation:** `validator = Draft202012Validator(schema_body); errors = sorted(validator.iter_errors(content), key=lambda e: e.path)`. +- **Unknown keywords:** ignored by default (jsonschema v0.3+). Our future `x-secret` extension works "for free" without needing `validators.extend()` until we wire the secrets layer. +- **No validator caching** in v1 — construct per-write. Cache by `(owner_id, logical_key)` keyed on schema `id` if throughput demands later (schemas are immutable, so cache invalidation is trivial). +- **No `referencing.Registry`** in v1 — records reference schemas by our own `schema_ref`/`schema_version` pair, not JSON Schema `$ref`. +- **Belt-and-suspenders:** wrap both `check_schema()` and `iter_errors()` in try/except for `jsonschema.exceptions.JsonSchemaException` (base class); translate any unhandled exception to a generic `validation_error` structured response so raw tracebacks never reach agents. + +## Data flow + +### `register_schema` (MCP) + +1. Tool handler receives `family, version, schema, source, tags, description, learned_from`. +2. `validation.validate_schema_body(schema)` → structured `invalid_schema` on failure. +3. Compose `logical_key = f"{family}:{version}"`. +4. Build `Entry(type=SCHEMA, ..., owner_id=current_owner())`. +5. `store.save_entry(entry)` → Postgres unique-constraint violation becomes `schema_already_exists`. +6. Submit to embedding pool (existing pattern). +7. Return `{"status": "ok", "id", "logical_key"}`. + +### `create_record` (MCP) + +1. Tool handler receives `logical_key, schema_ref, schema_version, content, source, tags, description, learned_from`. +2. `validation.resolve_schema(store, owner_id, schema_ref, schema_version)` → None if not found or soft-deleted. + - None → `schema_not_found` structured error with `searched_owners: [caller, "_system"]`. +3. Extract `schema_body = resolved.data["schema"]`. +4. `validation.validate_record_content(schema_body, content)` → error list. + - Non-empty → `validation_failed` with full list. +5. Build `Entry(type=RECORD, ...)` with caller-chosen `logical_key`. +6. `store.save_entry(entry)` — existing upsert path handles same-logical_key updates. +7. Return `{"status": "ok", "id", "action": "created" | "updated"}`. + +### Record update (`update_entry`) + +1. Load entry by ID; branch on `entry.type`. +2. `SCHEMA` → `schema_immutable`, always. +3. `RECORD`: + - Update touches `content` → re-resolve schema, re-validate, reject on failure. + - Update touches `schema_ref` or `schema_version` → `record_schema_pin_immutable`. + - Update touches only non-content fields → no re-validation. +4. Write + append changelog per existing machinery. + +### Schema delete (`delete_entry`) + +1. Load entry; if `type == SCHEMA`, call `assert_schema_deletable`. +2. `count_records_referencing` → raise `schema_in_use` with blocker list if count > 0. +3. Soft-delete proceeds via existing machinery. + +### CLI bootstrap + +1. Argparse validates required args. +2. Read schema file as JSON. +3. `validation.validate_schema_body()` → stderr structured error + exit 1 on failure. +4. Build Entry with `owner_id="_system"`, `learned_from="cli-bootstrap"`, composed `logical_key`. +5. Construct `PostgresStore` directly (bypasses MCP, middleware, auth). +6. `save_entry()`. Skip embedding submission. +7. Print `{"status": "ok", "id", "logical_key"}` to stdout, exit 0. + +## Error handling + +All errors route through existing `_error_response()` helper (`helpers.py:214`) → structured `ToolError`. No new helper, no new envelope format. + +### New error codes + +| Code | Where | Retryable | Extra fields | +|---|---|---|---| +| `invalid_schema` | `register_schema` meta-schema failure | false | `schema_error_path`, `detail` | +| `invalid_parameter` | `register_schema` malformed `family`/`version` (existing code) | false | `param`, `value`, `valid` | +| `schema_already_exists` | `register_schema` unique-constraint collision | false | `logical_key`, `existing_id` | +| `schema_not_found` | `create_record` / record update | false | `schema_ref`, `schema_version`, `searched_owners` | +| `validation_failed` | record content fails schema | false | `schema_ref`, `schema_version`, `validation_errors`, `truncated?`, `total_errors?` | +| `schema_immutable` | `update_entry` on schema | false | — | +| `record_schema_pin_immutable` | record update tries to change pin fields | false | `param` | +| `schema_in_use` | `delete_entry` on referenced schema | false | `referencing_records`, `total_count?` | + +### Validation error envelope shape + +```json +{ + "error": { + "code": "validation_failed", + "retryable": false, + "message": "Record content does not conform to schema edge-manifest:1.0.0 (2 errors)", + "schema_ref": "schema:edge-manifest", + "schema_version": "1.0.0", + "validation_errors": [ + { + "path": "/providers/0/name", + "message": "'name' is a required property", + "validator": "required", + "schema_path": "/properties/providers/items/required" + } + ] + } +} +``` + +- `path` from `ValidationError.json_path` — root is `/`, array indices included. +- `schema_path` is the JSON-Pointer-like path into the *schema* (`"/".join(str(p) for p in e.schema_path)`) — useful when the agent has the schema in hand for self-correction. +- `validator` is the failing JSON Schema keyword (`required`, `type`, `enum`, etc.) — enables keyword-specific remediation. +- List sorted by `path` for stable output. +- Truncated at 50 errors with `truncated: true, total_errors: `. + +## Deployment + +### Alembic migration + +`m8h9i0j1k2l3_add_system_user_for_schemas.py` (next sequential id; actual id assigned when authoring): + +```sql +INSERT INTO users (id, display_name, created) +VALUES ('_system', 'System-managed schemas', now()) +ON CONFLICT (id) DO NOTHING; +``` + +Single-purpose, idempotent, reversible. No DDL — leverages existing `users` table. + +### Operator deploy sequence + +1. Merge PR → Docker image rebuild on tag push (existing CI). +2. Pull + restart holodeck LXCs (production) **and** the QA instance (`docker-compose.qa.yaml`). +3. Run `mcp-awareness-migrate` in each environment — applies the `_system` user seed. **Not automatic; compose files do not run migrations at container start.** This matches the manual pattern used for all prior migrations (language/tsv backfills, OAuth columns, etc.). +4. Operator runs `mcp-awareness-register-schema --system ...` per built-in schema, gradually as schemas are authored. No requirement to seed all at deploy time. +5. No re-embed needed — existing entries unaffected. + +### Compose files + +All compose files (`docker-compose.yaml`, `docker-compose.qa.yaml`, `docker-compose.oauth.yaml`, `docker-compose.demo.yaml`) must remain coherent. **No changes required for this PR** — no new services, no new env vars, no new volumes, no new migration-at-start behavior. + +### Rollback + +`mcp-awareness-migrate --downgrade ` reverses the `_system` user seed. Any `schema`/`record` entries written during the deployment window remain in the DB as orphaned data on older code (unknown `EntryType` value → `_parse_entry_type` guard returns structured error). Re-rolling forward makes them visible again. + +### Feature flag + +None. The new tools are additive and opt-in. `_system` fallback only kicks in when a caller references a schema they don't own — opt-in by use. + +## Testing strategy + +### Unit tests: `tests/test_validation.py` + +Pure functions, no DB. Covers: + +- `validate_schema_body`: valid Draft 2020-12; invalid type value; non-object schema; empty `{}` (valid). +- `validate_record_content`: valid pass-through; multiple simultaneous failures; non-object content against non-object schema; `additionalProperties: false` behavior; truncation at 50. +- `compose_schema_logical_key`: format is `f"{family}:{version}"`. +- `resolve_schema` (with in-memory store stub): caller-owned present; `_system` fallback; caller wins over `_system`; soft-deleted excluded; neither exists. +- `assert_schema_deletable` (with store stub): passes with zero references; raises with blocker list. + +### Integration tests: `tests/test_tools_schema_record.py` + +Testcontainers Postgres. Covers: + +- `register_schema`: happy path; duplicate; invalid meta-schema; malformed `family`/`version`. +- `create_record`: happy path; against `_system` schema; schema-not-found; validation failure; upsert via same `logical_key`. +- `update_entry` on record: valid content update; invalid content update (rejected); non-content update; attempt to change `schema_ref`/`schema_version` (rejected). +- `update_entry` on schema: any update rejected. +- `delete_entry` on schema: zero refs succeeds; with refs rejected with blocker list; after refs soft-deleted succeeds. +- `delete_entry` on record: unchanged behavior. +- Cross-owner isolation: A cannot see B's schemas; both see `_system`; A's records invisible to B. + +### CLI tests: `tests/test_cli_register_schema.py` + +- Happy path: valid file → entry with `owner_id="_system"`, stdout structured response. +- Invalid schema file: stderr structured error, exit 1, no entry written. +- Missing required args: argparse error, exit 2. +- `--source`, `--tags`, `--description` flow through to stored entry. +- `learned_from` hardcoded to `"cli-bootstrap"`. + +### Existing tests to extend + +- `tests/test_schema.py` — add `SCHEMA`/`RECORD` enum coverage. +- `tests/test_postgres_store.py` — add `find_schema` + `count_records_referencing` coverage. +- `tests/test_tools.py` — any parametrized entry-type tests include new values. + +### Coverage discipline + +- Per `feedback_codecov_coverage.md` and `feedback_local_coverage_before_qa.md`: run `pytest --cov` locally before marking Ready for QA. +- All new lines in `validation.py`, `cli_register_schema.py`, and the tool handlers covered. No `pragma: no cover` without explicit approval. + +### Manual QA (PR body) + +Per project convention — MCP-call steps on an alternate-port test instance. Exercises: register schema; write valid record; write invalid record (verify envelope shape); update record content (valid + invalid); attempt schema update (verify immutability); delete schema with records (verify protection); delete schema without records; `_system` fallback via CLI tool. + +## PR conventions checklist + +Per `CLAUDE.md`: + +- [ ] CHANGELOG entry under `[Unreleased]`. +- [ ] README update if tool count or implemented-features sections change. +- [ ] Test count updated in README. +- [ ] `## QA` section in PR body with prerequisites + per-test checkboxes calling MCP tools. +- [ ] `QA Approved` label applied after manual QA. +- [ ] `docs/data-dictionary.md` updated with `schema`/`record` entry types and new `data` fields. +- [ ] Commit: AGPL v3 license preamble on every new `.py` file. + +## Open questions for planning phase + +None at design time. Items that will surface during planning: + +- Exact naming of the next Alembic revision id (depends on head at implementation time). +- Whether to split the PR at the CLI tool boundary if the test suite grows unwieldy — design allows it but default is a single PR. +- Whether to add a short `docs/schema-record-guide.md` alongside the implementation for users (can be filed as follow-up). + +## References + +- Awareness design spec: `design-schema-record-secrets` (entry `53b378b2`, 2026-03-28) +- Active intention: `3117644f` +- Historical intention cancelled in this session: `42bb92e5` (superseded) +- GitHub issue: [#208](https://github.com/cmeans/mcp-awareness/issues/208) +- Downstream consumers: Layer A/B/C tag taxonomy design (`design-tag-taxonomy-v2`), awareness-edge runtime +- `jsonschema` Python library: `/python-jsonschema/jsonschema` (context7), docs on `check_schema`, `iter_errors`, `referencing.Registry`, custom keyword extension +- MCP Bench audit: entry `1373dbd5` — tool surface concerns driving the "no generic create_entry refactor in this PR" decision +- Existing structured-error helper: `src/mcp_awareness/helpers.py:214` From 8695f6279d12225e989a5639bb852585c55a3ce7 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:23:16 -0500 Subject: [PATCH 02/28] docs: add schema + record entry types implementation plan Bite-sized TDD plan covering 19 tasks: new validation module, two Store protocol methods, two MCP tools, update_entry/delete_entry branching, Alembic _system user seed, CLI tool, docs, pre-push verification, and PR. Full self-review cross-checks every D1-D8 decision and error code against a concrete task. Co-Authored-By: Claude Opus 4.6 (1M context) --- ...26-04-13-schema-record-entry-types-plan.md | 2420 +++++++++++++++++ 1 file changed, 2420 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-13-schema-record-entry-types-plan.md diff --git a/docs/superpowers/plans/2026-04-13-schema-record-entry-types-plan.md b/docs/superpowers/plans/2026-04-13-schema-record-entry-types-plan.md new file mode 100644 index 0000000..b94fee4 --- /dev/null +++ b/docs/superpowers/plans/2026-04-13-schema-record-entry-types-plan.md @@ -0,0 +1,2420 @@ +# Schema + Record Entry Types Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add `EntryType.SCHEMA` and `EntryType.RECORD` with JSON Schema Draft 2020-12 validation on write, per-owner storage with a shared `_system` fallback, schema immutability, record re-validation on update, and a CLI tool for seeding system-owned schemas. Delivers steps 1–2 of the `design-schema-record-secrets` spec; secrets infrastructure is a separate follow-up. + +**Architecture:** New `validation.py` module holds pure validation functions using the `jsonschema` library (centralized, unit-testable without DB, keeps `jsonschema` out of the store layer). Two new MCP write tools — `register_schema` and `create_record` — match the existing one-tool-per-type convention. `update_entry` and `delete_entry` gain type-specific branches for the new entries. Store protocol grows exactly two methods: `find_schema` (with `_system` fallback) and `count_records_referencing` (for deletion protection). A new `mcp-awareness-register-schema` console script bypasses MCP for operator bootstrap of `_system`-owned schemas. + +**Tech Stack:** Python 3.11+, FastMCP, psycopg + pgvector-enabled Postgres 17, Alembic, `jsonschema>=4.26.0` (new dep), existing structured-error helper (`_error_response`), testcontainers for integration tests. + +**Spec:** [`docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md`](../specs/2026-04-13-schema-record-entry-types-design.md) — all design decisions D1–D8 and error codes are authoritative there. This plan implements without re-deriving. + +**Branch:** `feat/schema-record-entry-types` (already created with the spec commit). + +--- + +## File Map + +**Files to create:** +- `src/mcp_awareness/validation.py` — pure validation functions +- `src/mcp_awareness/cli_register_schema.py` — `mcp-awareness-register-schema` console script +- `alembic/versions/_add_system_user_for_schemas.py` — `_system` user seed migration +- `tests/test_validation.py` — unit tests for validation module +- `tests/test_tools_schema_record.py` — integration tests via testcontainers Postgres +- `tests/test_cli_register_schema.py` — CLI tool tests + +**Files to modify:** +- `src/mcp_awareness/schema.py` — add `SCHEMA` and `RECORD` enum values +- `src/mcp_awareness/store.py` — `Store` protocol: add `find_schema`, `count_records_referencing` +- `src/mcp_awareness/postgres_store.py` — implement the two new methods +- `src/mcp_awareness/tools.py` — add `register_schema`, `create_record`; branch `update_entry` / `delete_entry` +- `src/mcp_awareness/instructions.md` — mention new tools in server instructions +- `pyproject.toml` — add `jsonschema>=4.26.0` dep; add `mcp-awareness-register-schema` console script +- `CHANGELOG.md` — entry under `[Unreleased]` +- `README.md` — update tool count and "Implemented" section +- `docs/data-dictionary.md` — document `schema` and `record` entry types +- `tests/test_schema.py` — add enum-value coverage +- `tests/test_store.py` — add `find_schema` / `count_records_referencing` coverage + +--- + +## Execution Notes + +- **TDD throughout:** every code task writes the failing test first, verifies it fails, implements minimal code, verifies it passes, commits. No committing of untested code. +- **Commit frequency:** at minimum one commit per task, often mid-task after a green test. +- **Conventional commits:** `feat:`, `fix:`, `docs:`, `test:`, `chore:`, `refactor:` as appropriate. +- **Pre-commit discipline** (per saved feedback): before first push, run `ruff format`, `ruff check`, `mypy src/`, `pytest --cov`, verify coverage of new lines, verify test count in README matches reality. +- **AGPL preamble:** every new `.py` file must start with the AGPL v3 license header (copy from any existing `src/mcp_awareness/*.py` file). +- **Structured errors only:** all new error paths use `_error_response()` from `helpers.py`. No `raise ValueError` in tool-facing paths. +- **No `pragma: no cover`** without explicit approval. + +--- + +## Task 1: Add `SCHEMA` and `RECORD` to `EntryType` enum + +**Files:** +- Modify: `src/mcp_awareness/schema.py` (class `EntryType`, line 30) +- Modify: `tests/test_schema.py` + +- [ ] **Step 1: Write failing test** + +Append to `tests/test_schema.py`: + +```python +def test_entry_type_schema_value(): + assert EntryType.SCHEMA.value == "schema" + assert EntryType("schema") is EntryType.SCHEMA + + +def test_entry_type_record_value(): + assert EntryType.RECORD.value == "record" + assert EntryType("record") is EntryType.RECORD +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `pytest tests/test_schema.py::test_entry_type_schema_value tests/test_schema.py::test_entry_type_record_value -v` +Expected: FAIL — `AttributeError: SCHEMA`. + +- [ ] **Step 3: Add enum values** + +Edit `src/mcp_awareness/schema.py`, inside `class EntryType`: + +```python +class EntryType(str, Enum): + STATUS = "status" + ALERT = "alert" + PATTERN = "pattern" + SUPPRESSION = "suppression" + CONTEXT = "context" + PREFERENCE = "preference" + NOTE = "note" + INTENTION = "intention" + SCHEMA = "schema" + RECORD = "record" +``` + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_schema.py -v` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/schema.py tests/test_schema.py +git commit -m "feat: add SCHEMA and RECORD to EntryType enum" +``` + +--- + +## Task 2: Add `jsonschema` dependency + +**Files:** +- Modify: `pyproject.toml` + +- [ ] **Step 1: Add to dependencies** + +Edit `pyproject.toml` → `[project] dependencies` array (or equivalent). Add: + +``` +jsonschema>=4.26.0,<5 +``` + +- [ ] **Step 2: Install locally** + +Run: `pip install -e ".[dev]"` +Expected: installs `jsonschema`, `jsonschema-specifications`, `referencing`, `rpds-py`, `attrs`. + +- [ ] **Step 3: Verify importable** + +Run: `python -c "from jsonschema import Draft202012Validator; print(Draft202012Validator.META_SCHEMA['$id'])"` +Expected: prints `https://json-schema.org/draft/2020-12/schema`. + +- [ ] **Step 4: Commit** + +```bash +git add pyproject.toml +git commit -m "chore: add jsonschema>=4.26.0 dependency" +``` + +--- + +## Task 3: Create `validation.py` with `compose_schema_logical_key` + +Start with the smallest pure function to establish the module. + +**Files:** +- Create: `src/mcp_awareness/validation.py` +- Create: `tests/test_validation.py` + +- [ ] **Step 1: Create failing test** + +Create `tests/test_validation.py`: + +```python +# AGPL preamble here — copy from tests/test_schema.py + +"""Tests for src/mcp_awareness/validation.py.""" + +from __future__ import annotations + +import pytest + +from mcp_awareness.validation import compose_schema_logical_key + + +def test_compose_schema_logical_key_basic(): + assert compose_schema_logical_key("schema:edge-manifest", "1.0.0") == "schema:edge-manifest:1.0.0" + + +def test_compose_schema_logical_key_no_prefix(): + assert compose_schema_logical_key("tag-taxonomy", "0.1.0") == "tag-taxonomy:0.1.0" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `pytest tests/test_validation.py -v` +Expected: FAIL — `ModuleNotFoundError: No module named 'mcp_awareness.validation'`. + +- [ ] **Step 3: Create validation module** + +Create `src/mcp_awareness/validation.py`: + +```python +# AGPL preamble here — copy from src/mcp_awareness/schema.py + +"""Validation helpers for Schema and Record entry types. + +Pure functions wrapping jsonschema Draft 2020-12 validation and schema +lookup with _system fallback. Kept out of the store layer so the Store +protocol stays swappable (no jsonschema import in store.py). +""" + +from __future__ import annotations + + +def compose_schema_logical_key(family: str, version: str) -> str: + """Derive the canonical logical_key for a schema entry. + + Single source of truth for the family+version → logical_key format. + Used by register_schema on write and by resolve_schema on lookup. + """ + return f"{family}:{version}" +``` + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_validation.py -v` +Expected: both tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/validation.py tests/test_validation.py +git commit -m "feat: add validation module with compose_schema_logical_key" +``` + +--- + +## Task 4: `validation.validate_schema_body` + +**Files:** +- Modify: `src/mcp_awareness/validation.py` +- Modify: `tests/test_validation.py` + +- [ ] **Step 1: Write failing tests** + +Append to `tests/test_validation.py`: + +```python +import jsonschema + +from mcp_awareness.validation import validate_schema_body + + +def test_validate_schema_body_accepts_valid_object_schema(): + schema = { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + } + validate_schema_body(schema) # must not raise + + +def test_validate_schema_body_rejects_bad_type(): + schema = {"type": "strng"} # typo: 'strng' is not a valid JSON Schema type + with pytest.raises(jsonschema.exceptions.SchemaError): + validate_schema_body(schema) + + +def test_validate_schema_body_accepts_empty_object(): + # Empty schema matches anything — valid per spec + validate_schema_body({}) + + +def test_validate_schema_body_rejects_non_dict(): + # Schemas must be objects; bare arrays fail meta-schema + with pytest.raises(jsonschema.exceptions.SchemaError): + validate_schema_body([{"type": "string"}]) # type: ignore[arg-type] +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `pytest tests/test_validation.py -v` +Expected: FAIL — `ImportError: cannot import name 'validate_schema_body'`. + +- [ ] **Step 3: Implement** + +Append to `src/mcp_awareness/validation.py`: + +```python +from typing import Any + +from jsonschema import Draft202012Validator + + +def validate_schema_body(schema: Any) -> None: + """Validate a schema body against the JSON Schema Draft 2020-12 meta-schema. + + Raises jsonschema.exceptions.SchemaError on invalid schema. Callers at + the MCP boundary translate this into a structured 'invalid_schema' error + response; direct callers (CLI) format to stderr. + """ + Draft202012Validator.check_schema(schema) +``` + +- [ ] **Step 4: Run tests, verify pass** + +Run: `pytest tests/test_validation.py -v` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/validation.py tests/test_validation.py +git commit -m "feat: add validate_schema_body for Draft 2020-12 meta-schema check" +``` + +--- + +## Task 5: `validation.validate_record_content` + +Returns a sorted list of flattened error dicts. Callers decide how to envelope them. + +**Files:** +- Modify: `src/mcp_awareness/validation.py` +- Modify: `tests/test_validation.py` + +- [ ] **Step 1: Write failing tests** + +Append to `tests/test_validation.py`: + +```python +from mcp_awareness.validation import validate_record_content + + +_PERSON_SCHEMA = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0}, + }, + "required": ["name"], +} + + +def test_validate_record_content_valid_returns_empty_list(): + assert validate_record_content(_PERSON_SCHEMA, {"name": "Alice", "age": 30}) == [] + + +def test_validate_record_content_surfaces_missing_required(): + errors = validate_record_content(_PERSON_SCHEMA, {"age": 30}) + assert len(errors) == 1 + assert errors[0]["validator"] == "required" + assert "name" in errors[0]["message"] + + +def test_validate_record_content_surfaces_all_errors(): + # Missing 'name' AND age is wrong type + errors = validate_record_content(_PERSON_SCHEMA, {"age": "thirty"}) + assert len(errors) == 2 + validators = {e["validator"] for e in errors} + assert validators == {"required", "type"} + + +def test_validate_record_content_is_sorted_by_path(): + schema = { + "type": "object", + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"}, + "c": {"type": "integer"}, + }, + } + errors = validate_record_content(schema, {"a": "x", "b": "y", "c": "z"}) + paths = [e["path"] for e in errors] + assert paths == sorted(paths) + + +def test_validate_record_content_accepts_primitive_schema(): + schema = {"type": "integer"} + assert validate_record_content(schema, 42) == [] + errors = validate_record_content(schema, "abc") + assert len(errors) == 1 + assert errors[0]["validator"] == "type" + + +def test_validate_record_content_array_schema_with_index_paths(): + schema = {"type": "array", "items": {"type": "integer"}} + errors = validate_record_content(schema, [1, "two", 3, "four"]) + assert len(errors) == 2 + # Array indices should appear in paths + paths = [e["path"] for e in errors] + assert any("1" in p for p in paths) + assert any("3" in p for p in paths) + + +def test_validate_record_content_truncates_at_50(): + schema = { + "type": "array", + "items": {"type": "integer"}, + } + # 60 wrong-type items — all fail + result = validate_record_content(schema, ["x"] * 60) + assert isinstance(result, list) + # Truncation is carried via a special sentinel entry at the end; see impl + assert len(result) == 51 # 50 errors + 1 truncation marker + assert result[-1]["truncated"] is True + assert result[-1]["total_errors"] == 60 +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `pytest tests/test_validation.py -v` +Expected: FAIL on missing import. + +- [ ] **Step 3: Implement** + +Append to `src/mcp_awareness/validation.py`: + +```python +from jsonschema import ValidationError + +_MAX_VALIDATION_ERRORS = 50 + + +def _flatten_error(err: ValidationError) -> dict[str, Any]: + """Flatten a jsonschema ValidationError to a structured dict for the error envelope.""" + return { + "path": err.json_path, + "message": err.message, + "validator": err.validator, + "schema_path": "/" + "/".join(str(p) for p in err.schema_path), + } + + +def validate_record_content(schema_body: dict[str, Any], content: Any) -> list[dict[str, Any]]: + """Validate content against a schema body. Returns list of structured errors. + + Empty list means valid. List truncated at _MAX_VALIDATION_ERRORS; when + truncated, final entry is {'truncated': True, 'total_errors': }. + """ + validator = Draft202012Validator(schema_body) + all_errors = sorted(validator.iter_errors(content), key=lambda e: e.path) + if len(all_errors) <= _MAX_VALIDATION_ERRORS: + return [_flatten_error(e) for e in all_errors] + kept = [_flatten_error(e) for e in all_errors[:_MAX_VALIDATION_ERRORS]] + kept.append({"truncated": True, "total_errors": len(all_errors)}) + return kept +``` + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_validation.py -v` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/validation.py tests/test_validation.py +git commit -m "feat: add validate_record_content with iter_errors and truncation" +``` + +--- + +## Task 6: Add `find_schema` to Store protocol and PostgresStore + +**Files:** +- Modify: `src/mcp_awareness/store.py` (Store protocol) +- Modify: `src/mcp_awareness/postgres_store.py` (implementation) +- Modify: `tests/test_store.py` +- Create (if needed): `src/mcp_awareness/sql/find_schema.sql` + +- [ ] **Step 1: Inspect existing Store protocol** + +Read `src/mcp_awareness/store.py` to see the current Protocol signature style; mirror it. + +- [ ] **Step 2: Write failing integration test** + +Append to `tests/test_store.py`: + +```python +from mcp_awareness.schema import Entry, EntryType, make_id, now_utc + +SYSTEM_OWNER = "_system" + + +def _schema_entry(owner: str, logical_key: str, family: str, version: str, schema_body: dict) -> Entry: + return Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="test", + tags=[], + created=now_utc(), + updated=None, + expires=None, + data={ + "family": family, + "version": version, + "schema": schema_body, + "description": "test schema", + "learned_from": "test", + }, + logical_key=logical_key, + owner_id=owner, + ) + + +def test_find_schema_returns_caller_owned(store): + # Ensure _system user exists so the FK-less owner_id insert is valid + store._conn_pool # ensure pool lazy-init done — or use a helper if provided + # Insert _system user if the test schema doesn't seed it; adjust if fixture changes + store.save_entry(_schema_entry(TEST_OWNER, "s:test:1.0.0", "s:test", "1.0.0", {"type": "object"})) + found = store.find_schema(TEST_OWNER, "s:test:1.0.0") + assert found is not None + assert found.owner_id == TEST_OWNER + assert found.data["family"] == "s:test" + + +def test_find_schema_system_fallback(store): + store.save_entry(_schema_entry(SYSTEM_OWNER, "s:test:1.0.0", "s:test", "1.0.0", {"type": "object"})) + found = store.find_schema(TEST_OWNER, "s:test:1.0.0") + assert found is not None + assert found.owner_id == SYSTEM_OWNER + + +def test_find_schema_caller_wins_over_system(store): + # Seed _system first + store.save_entry(_schema_entry(SYSTEM_OWNER, "s:test:1.0.0", "s:test", "1.0.0", {"type": "object"})) + # Then caller-owned override + store.save_entry(_schema_entry(TEST_OWNER, "s:test:1.0.0", "s:test", "1.0.0", {"type": "string"})) + found = store.find_schema(TEST_OWNER, "s:test:1.0.0") + assert found is not None + assert found.owner_id == TEST_OWNER + # The caller-owned schema overrode the system one + assert found.data["schema"] == {"type": "string"} + + +def test_find_schema_returns_none_when_missing(store): + assert store.find_schema(TEST_OWNER, "s:nonexistent:1.0.0") is None + + +def test_find_schema_excludes_soft_deleted(store): + entry = _schema_entry(TEST_OWNER, "s:test:1.0.0", "s:test", "1.0.0", {"type": "object"}) + store.save_entry(entry) + store.delete_entry(TEST_OWNER, entry.id) + assert store.find_schema(TEST_OWNER, "s:test:1.0.0") is None +``` + +Note: the `_system` user FK must exist before inserting entries with that `owner_id`. This is normally handled by the migration in Task 10. During testing, **either** wait for Task 10 to land **or** add a test-only helper that inserts `_system` into `users`. The simplest approach: chain Tasks 6 and 10 together OR do Task 10 before Task 6. **Decision: reorder — do the migration (Task 10) before writing the store integration tests.** + +→ **If following the plan in order, swap Task 6 and Task 10.** Alternative: augment conftest.py's `store` fixture to pre-seed `_system` into `users` (keeps plan order natural). + +Preferred approach: add `_system` to the `store` fixture in `conftest.py`: + +```python +@pytest.fixture +def store(pg_dsn): + """Fresh PostgresStore for each test — tables created, then cleared after.""" + s = PostgresStore(pg_dsn) + # Ensure _system user exists for cross-owner schema tests. + with s._conn_pool.connection() as conn, conn.cursor() as cur: + cur.execute( + "INSERT INTO users (id, display_name) VALUES ('_system', 'System-managed schemas') " + "ON CONFLICT (id) DO NOTHING" + ) + conn.commit() + yield s + s.clear(TEST_OWNER) + s.clear(SYSTEM_OWNER) +``` + +- [ ] **Step 3: Run tests to verify failure** + +Run: `pytest tests/test_store.py -v -k find_schema` +Expected: FAIL — `AttributeError: PostgresStore has no attribute 'find_schema'`. + +- [ ] **Step 4: Add method to Store protocol** + +Edit `src/mcp_awareness/store.py`, add to the `Store` Protocol: + +```python +def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: + """Look up a schema entry by logical_key, preferring caller-owned over _system. + + Returns the schema entry or None if not found or soft-deleted. + """ + ... +``` + +- [ ] **Step 5: Implement in PostgresStore** + +Edit `src/mcp_awareness/postgres_store.py`: + +```python +def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: + """Look up a schema, preferring caller-owned over _system-owned. + + Single query with CASE-based ORDER BY for predictable override + semantics: caller's own version wins, _system is fallback. + """ + query = """ + SELECT id, type, source, tags, created, updated, expires, data, + logical_key, owner_id, language, deleted + FROM entries + WHERE type = 'schema' + AND logical_key = %(logical_key)s + AND owner_id IN (%(caller)s, '_system') + AND deleted IS NULL + ORDER BY CASE WHEN owner_id = %(caller)s THEN 0 ELSE 1 END + LIMIT 1 + """ + with self._conn_pool.connection() as conn, conn.cursor(row_factory=dict_row) as cur: + cur.execute(query, {"logical_key": logical_key, "caller": owner_id}) + row = cur.fetchone() + if row is None: + return None + return _row_to_entry(row) +``` + +Adjust `dict_row` / `_row_to_entry` to match existing patterns in the file (import name and helper function may differ — follow what the rest of `postgres_store.py` uses). + +- [ ] **Step 6: Externalize SQL if project pattern requires** + +If the codebase follows "one SQL file per operation" (check `src/mcp_awareness/sql/`), create `sql/find_schema.sql` with the query text and load it via the existing SQL-loading helper. Otherwise, inline is fine. + +- [ ] **Step 7: Run tests** + +Run: `pytest tests/test_store.py -v -k find_schema` +Expected: all pass. + +- [ ] **Step 8: Commit** + +```bash +git add src/mcp_awareness/store.py src/mcp_awareness/postgres_store.py \ + tests/test_store.py tests/conftest.py src/mcp_awareness/sql/find_schema.sql +git commit -m "feat: add Store.find_schema with _system fallback" +``` + +--- + +## Task 7: Add `count_records_referencing` to Store and PostgresStore + +**Files:** +- Modify: `src/mcp_awareness/store.py` +- Modify: `src/mcp_awareness/postgres_store.py` +- Modify: `tests/test_store.py` +- Create (if project convention): `src/mcp_awareness/sql/count_records_referencing.sql` + +- [ ] **Step 1: Write failing tests** + +Append to `tests/test_store.py`: + +```python +def _record_entry(owner: str, logical_key: str, schema_ref: str, schema_version: str, content) -> Entry: + return Entry( + id=make_id(), + type=EntryType.RECORD, + source="test", + tags=[], + created=now_utc(), + updated=None, + expires=None, + data={ + "schema_ref": schema_ref, + "schema_version": schema_version, + "content": content, + "description": "test record", + "learned_from": "test", + }, + logical_key=logical_key, + owner_id=owner, + ) + + +def test_count_records_referencing_returns_zero_when_none(store): + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 0 + assert ids == [] + + +def test_count_records_referencing_counts_matching_records(store): + # Insert 3 records referencing s:test:1.0.0 + for i in range(3): + store.save_entry(_record_entry(TEST_OWNER, f"rec-{i}", "s:test", "1.0.0", {"i": i})) + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 3 + assert len(ids) == 3 + + +def test_count_records_referencing_excludes_soft_deleted(store): + e = _record_entry(TEST_OWNER, "rec-1", "s:test", "1.0.0", {}) + store.save_entry(e) + store.delete_entry(TEST_OWNER, e.id) + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 0 + assert ids == [] + + +def test_count_records_referencing_ignores_other_versions(store): + store.save_entry(_record_entry(TEST_OWNER, "rec-1", "s:test", "1.0.0", {})) + store.save_entry(_record_entry(TEST_OWNER, "rec-2", "s:test", "2.0.0", {})) + count, _ = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 1 + + +def test_count_records_referencing_caps_id_list_at_ten(store): + for i in range(15): + store.save_entry(_record_entry(TEST_OWNER, f"rec-{i}", "s:test", "1.0.0", {"i": i})) + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 15 + assert len(ids) == 10 +``` + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_store.py -v -k count_records_referencing` +Expected: FAIL — method not defined. + +- [ ] **Step 3: Add to protocol and implement** + +Edit `src/mcp_awareness/store.py`: + +```python +def count_records_referencing( + self, owner_id: str, schema_logical_key: str +) -> tuple[int, list[str]]: + """Return (total_count, first_N_ids) of non-deleted records referencing a schema. + + The schema_logical_key is composed as f"{schema_ref}:{schema_version}". + Caller uses total_count for the error payload and ids for the blocker list. + """ + ... +``` + +Edit `src/mcp_awareness/postgres_store.py`: + +```python +def count_records_referencing( + self, owner_id: str, schema_logical_key: str +) -> tuple[int, list[str]]: + """Count and sample-id records referencing a schema version. + + Query splits schema_logical_key into schema_ref + version by splitting on + the last ':'. Matches data.schema_ref and data.schema_version in the + record entries' JSONB. + """ + # Parse "schema_ref:schema_version" — schema_ref may itself contain ':' + # (e.g., "schema:edge-manifest:1.0.0"). Split on the LAST ':'. + ref, _, version = schema_logical_key.rpartition(":") + count_query = """ + SELECT COUNT(*) AS cnt + FROM entries + WHERE type = 'record' + AND owner_id = %(owner)s + AND data->>'schema_ref' = %(ref)s + AND data->>'schema_version' = %(version)s + AND deleted IS NULL + """ + ids_query = """ + SELECT id + FROM entries + WHERE type = 'record' + AND owner_id = %(owner)s + AND data->>'schema_ref' = %(ref)s + AND data->>'schema_version' = %(version)s + AND deleted IS NULL + ORDER BY created + LIMIT 10 + """ + params = {"owner": owner_id, "ref": ref, "version": version} + with self._conn_pool.connection() as conn, conn.cursor(row_factory=dict_row) as cur: + cur.execute(count_query, params) + count = cur.fetchone()["cnt"] + if count == 0: + return (0, []) + cur.execute(ids_query, params) + ids = [r["id"] for r in cur.fetchall()] + return (count, ids) +``` + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_store.py -v -k count_records_referencing` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/store.py src/mcp_awareness/postgres_store.py tests/test_store.py +git commit -m "feat: add Store.count_records_referencing for schema deletion protection" +``` + +--- + +## Task 8: `validation.resolve_schema` + +Uses `store.find_schema()` under the hood but exists in the validation module for a uniform interface to callers. + +**Files:** +- Modify: `src/mcp_awareness/validation.py` +- Modify: `tests/test_validation.py` + +- [ ] **Step 1: Write unit tests with store stub** + +Append to `tests/test_validation.py`: + +```python +from mcp_awareness.validation import resolve_schema + + +class _StubStore: + """Minimal Store-like stub for validation unit tests. + + Records calls to find_schema and returns pre-configured results keyed by + (owner_id, logical_key). Only needs to implement find_schema; other Store + methods are never called by resolve_schema. + """ + + def __init__(self): + self._results: dict[tuple[str, str], object] = {} + self.calls: list[tuple[str, str]] = [] + + def set(self, owner_id: str, logical_key: str, result): + self._results[(owner_id, logical_key)] = result + + def find_schema(self, owner_id, logical_key): + self.calls.append((owner_id, logical_key)) + return self._results.get((owner_id, logical_key)) + + +def test_resolve_schema_returns_caller_owned(): + stub = _StubStore() + stub.set("alice", "s:test:1.0.0", object()) # sentinel + result = resolve_schema(stub, "alice", "s:test", "1.0.0") + assert result is stub._results[("alice", "s:test:1.0.0")] + + +def test_resolve_schema_returns_none_when_missing(): + stub = _StubStore() + assert resolve_schema(stub, "alice", "s:nope", "1.0.0") is None +``` + +Note: the underlying `find_schema` already handles `_system` fallback at the SQL level, so `resolve_schema` delegates fully. No branching in Python. + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_validation.py -v -k resolve_schema` +Expected: FAIL — missing import. + +- [ ] **Step 3: Implement** + +Append to `src/mcp_awareness/validation.py`: + +```python +from typing import Protocol + + +class _SchemaFinder(Protocol): + """Minimal protocol for resolve_schema's store dependency.""" + def find_schema(self, owner_id: str, logical_key: str): + ... + + +def resolve_schema(store: _SchemaFinder, owner_id: str, family: str, version: str): + """Resolve a schema by family + version, preferring caller-owned. + + Delegates to Store.find_schema (which handles the _system fallback at + the SQL level). Returns the schema Entry or None. + """ + return store.find_schema(owner_id, compose_schema_logical_key(family, version)) +``` + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_validation.py -v` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/validation.py tests/test_validation.py +git commit -m "feat: add validation.resolve_schema delegating to Store.find_schema" +``` + +--- + +## Task 9: `validation.assert_schema_deletable` + +**Files:** +- Modify: `src/mcp_awareness/validation.py` +- Modify: `tests/test_validation.py` + +- [ ] **Step 1: Write failing tests** + +Append to `tests/test_validation.py`: + +```python +from mcp_awareness.validation import SchemaInUseError, assert_schema_deletable + + +class _CounterStore: + """Stub exposing count_records_referencing.""" + + def __init__(self, count: int, ids: list[str]): + self._count = count + self._ids = ids + + def count_records_referencing(self, owner_id, schema_logical_key): + return (self._count, self._ids) + + +def test_assert_schema_deletable_passes_with_zero_refs(): + assert_schema_deletable(_CounterStore(0, []), "alice", "s:test:1.0.0") + + +def test_assert_schema_deletable_raises_with_refs(): + with pytest.raises(SchemaInUseError) as excinfo: + assert_schema_deletable(_CounterStore(3, ["id1", "id2", "id3"]), "alice", "s:test:1.0.0") + assert excinfo.value.total_count == 3 + assert excinfo.value.referencing_records == ["id1", "id2", "id3"] +``` + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_validation.py -v -k assert_schema_deletable` +Expected: FAIL — missing import. + +- [ ] **Step 3: Implement** + +Append to `src/mcp_awareness/validation.py`: + +```python +class SchemaInUseError(Exception): + """Raised when a schema cannot be deleted because records reference it. + + Callers at the MCP boundary translate this into a structured schema_in_use + error response with the referencing_records list and total_count. + """ + + def __init__(self, total_count: int, referencing_records: list[str]): + self.total_count = total_count + self.referencing_records = referencing_records + super().__init__( + f"Cannot delete schema: {total_count} record(s) still reference it" + ) + + +class _RefCounter(Protocol): + def count_records_referencing(self, owner_id: str, schema_logical_key: str) -> tuple[int, list[str]]: + ... + + +def assert_schema_deletable( + store: _RefCounter, owner_id: str, schema_logical_key: str +) -> None: + """Raise SchemaInUseError if any non-deleted records reference this schema.""" + count, ids = store.count_records_referencing(owner_id, schema_logical_key) + if count > 0: + raise SchemaInUseError(total_count=count, referencing_records=ids) +``` + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_validation.py -v` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/validation.py tests/test_validation.py +git commit -m "feat: add assert_schema_deletable and SchemaInUseError" +``` + +--- + +## Task 10: Alembic migration — seed `_system` user + +**Files:** +- Create: `alembic/versions/_add_system_user_for_schemas.py` + +- [ ] **Step 1: Determine next revision id** + +Run: `alembic current` (needs DB — or read head from `alembic/versions/` by the most recent `down_revision` chain). The latest is `l7g8h9i0j1k2_backfill_entry_language`. Pick the next id in the project's scheme — e.g., `m8h9i0j1k2l3`. + +- [ ] **Step 2: Create the migration file** + +Create `alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py`: + +```python +# AGPL preamble — copy from alembic/versions/l7g8h9i0j1k2_backfill_entry_language.py + +"""add _system user for system-owned schemas + +Revision ID: m8h9i0j1k2l3 +Revises: l7g8h9i0j1k2 +Create Date: 2026-04-13 00:00:00.000000 + +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from alembic import op + +revision: str = "m8h9i0j1k2l3" +down_revision: str | Sequence[str] | None = "l7g8h9i0j1k2" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Seed the _system user for system-owned schema entries. + + Idempotent — ON CONFLICT DO NOTHING lets the migration run multiple + times safely (e.g., after a stamp-and-reapply). + """ + op.execute( + "INSERT INTO users (id, display_name) " + "VALUES ('_system', 'System-managed schemas') " + "ON CONFLICT (id) DO NOTHING" + ) + + +def downgrade() -> None: + """Remove the _system user. + + Will fail if any entries still reference owner_id='_system'. Operators + must soft-delete or re-home such entries before downgrade. + """ + op.execute("DELETE FROM users WHERE id = '_system'") +``` + +- [ ] **Step 3: Test the migration end-to-end** + +Run: `mcp-awareness-migrate` against a local Postgres (the testcontainers instance or a scratch DB). +Expected: exits 0 with "Migrations complete."; `SELECT id FROM users WHERE id='_system'` returns a row. + +Run: `mcp-awareness-migrate --downgrade l7g8h9i0j1k2` +Expected: exits 0; `_system` row removed. + +Run: `mcp-awareness-migrate` again (re-upgrade) to confirm re-applies cleanly. + +- [ ] **Step 4: Add a quick idempotence test** + +Since Alembic testing is typically integration-level, add a smoke test to `tests/test_store.py`: + +```python +def test_system_user_exists_after_migration(store): + """The conftest fixture inserts _system — verifies the migration logic is ON CONFLICT safe.""" + # Fixture already inserted; insert again to prove ON CONFLICT DO NOTHING semantics + with store._conn_pool.connection() as conn, conn.cursor() as cur: + cur.execute( + "INSERT INTO users (id, display_name) VALUES ('_system', 'Re-insert') " + "ON CONFLICT (id) DO NOTHING" + ) + conn.commit() + cur.execute("SELECT COUNT(*) FROM users WHERE id = '_system'") + assert cur.fetchone()[0] == 1 +``` + +- [ ] **Step 5: Commit** + +```bash +git add alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py tests/test_store.py +git commit -m "feat: add migration seeding _system user for shared schemas" +``` + +--- + +## Task 11: MCP tool — `register_schema` + +**Files:** +- Modify: `src/mcp_awareness/tools.py` +- Create: `tests/test_tools_schema_record.py` + +- [ ] **Step 1: Write failing integration tests** + +Create `tests/test_tools_schema_record.py`: + +```python +# AGPL preamble — copy from tests/test_store.py + +"""Integration tests for schema/record MCP tool handlers. + +Uses testcontainers Postgres + direct tool-function calls via the server's +contextvar-based owner resolution. +""" + +from __future__ import annotations + +import json + +import pytest + +from mcp_awareness.schema import EntryType + + +TEST_OWNER = "test-owner" + + +@pytest.fixture +def configured_server(store, monkeypatch): + """Wire the FastMCP server to the testcontainers store.""" + import mcp_awareness.server as srv + monkeypatch.setattr(srv, "store", store) + # Set owner contextvar for all subsequent tool calls + from mcp_awareness.server import current_owner # or wherever the contextvar lives + token = current_owner.set(TEST_OWNER) + yield srv + current_owner.reset(token) + + +@pytest.mark.asyncio +async def test_register_schema_happy_path(configured_server): + from mcp_awareness.tools import register_schema + + response = await register_schema( + source="test", + tags=["schema"], + description="test schema", + family="schema:test-thing", + version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + body = json.loads(response) + assert body["status"] == "ok" + assert body["logical_key"] == "schema:test-thing:1.0.0" + assert "id" in body + + +@pytest.mark.asyncio +async def test_register_schema_rejects_invalid_schema(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import register_schema + + with pytest.raises(ToolError) as excinfo: + await register_schema( + source="test", + tags=["schema"], + description="bad schema", + family="schema:bad", + version="1.0.0", + schema={"type": "strng"}, # typo + ) + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "invalid_schema" + + +@pytest.mark.asyncio +async def test_register_schema_rejects_duplicate_family_version(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import register_schema + + await register_schema( + source="test", tags=[], description="v1", + family="schema:dup", version="1.0.0", + schema={"type": "object"}, + ) + with pytest.raises(ToolError) as excinfo: + await register_schema( + source="test", tags=[], description="v1 again", + family="schema:dup", version="1.0.0", + schema={"type": "object"}, + ) + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "schema_already_exists" +``` + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_tools_schema_record.py -v -k register_schema` +Expected: FAIL — `register_schema` does not exist in `tools.py`. + +- [ ] **Step 3: Implement the tool** + +Add to `src/mcp_awareness/tools.py` (follow the exact pattern of `remember` for decorator order, docstring shape, and use of `_srv.mcp.tool()`, `_timed`, embedding submission, etc.): + +```python +@_srv.mcp.tool() +@_timed +async def register_schema( + source: str, + tags: list[str], + description: str, + family: str, + version: str, + schema: dict[str, Any], + learned_from: str = "conversation", + language: str | None = None, +) -> str: + """Register a new JSON Schema entry for later use by records. + + Validates the schema body against JSON Schema Draft 2020-12 meta-schema + on write. Family + version are combined into the entry's logical_key + (schema:family:version); each version is a separate entry. Schemas are + absolutely immutable once registered — to change one, register a new + version and (if no records reference the old one) delete it. + + Returns: + JSON: {"status": "ok", "id": "", "logical_key": ""} + + If you receive an unstructured error, the failure is in the transport + or platform layer, not in awareness.""" + from jsonschema import exceptions as jse + from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body + + # Validate family / version + if not family or ":" in family.split(":", 1)[0]: + # Explicit invalid_parameter pattern + _error_response( + "invalid_parameter", + "family must be a non-empty string", + retryable=False, param="family", value=family, + ) + if not version: + _error_response( + "invalid_parameter", "version must be a non-empty string", + retryable=False, param="version", value=version, + ) + + # Validate the schema body + try: + validate_schema_body(schema) + except jse.SchemaError as e: + _error_response( + "invalid_schema", + f"Schema does not conform to JSON Schema Draft 2020-12: {e.message}", + retryable=False, + schema_error_path="/" + "/".join(str(p) for p in e.absolute_path), + detail=str(e.message), + ) + except jse.JsonSchemaException as e: + _error_response( + "validation_error", f"Unexpected schema validation error: {e}", + retryable=False, + ) + + logical_key = compose_schema_logical_key(family, version) + now = now_utc() + data: dict[str, Any] = { + "family": family, + "version": version, + "schema": schema, + "description": description, + "learned_from": learned_from, + } + text_for_detect = compose_detection_text("schema", data) + resolved_lang = resolve_language(explicit=language, text_for_detection=text_for_detect) + _check_unsupported_language(text_for_detect, resolved_lang) + + entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source=source, + tags=tags, + created=now, + updated=None, + expires=None, + data=data, + logical_key=logical_key, + owner_id=_srv._current_owner(), # or existing helper + language=resolved_lang, + ) + try: + _srv.store.save_entry(entry) + except _UniqueViolation as e: # existing pattern for 23505 translation + _error_response( + "schema_already_exists", + f"Schema {logical_key} already exists in source {source}", + retryable=False, logical_key=logical_key, existing_id=e.existing_id, + ) + + _srv._generate_embedding(entry) + return json.dumps({"status": "ok", "id": entry.id, "logical_key": logical_key}) +``` + +Match the existing unique-constraint translation pattern (check `remember` for how logical_key collisions are surfaced — it uses upsert semantics, but for schemas we want *rejection* not upsert). + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_tools_schema_record.py -v -k register_schema` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/tools.py tests/test_tools_schema_record.py +git commit -m "feat: add register_schema MCP tool" +``` + +--- + +## Task 12: MCP tool — `create_record` + +**Files:** +- Modify: `src/mcp_awareness/tools.py` +- Modify: `tests/test_tools_schema_record.py` + +- [ ] **Step 1: Write failing tests** + +Append to `tests/test_tools_schema_record.py`: + +```python +@pytest.mark.asyncio +async def test_create_record_happy_path(configured_server): + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + response = await create_record( + source="test", tags=[], description="a thing", + logical_key="thing-one", + schema_ref="schema:thing", schema_version="1.0.0", + content={"name": "widget"}, + ) + body = json.loads(response) + assert body["status"] == "ok" + assert body["action"] == "created" + assert "id" in body + + +@pytest.mark.asyncio +async def test_create_record_rejects_unknown_schema(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record + + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", tags=[], description="orphan", + logical_key="thing-one", + schema_ref="schema:does-not-exist", schema_version="1.0.0", + content={"name": "widget"}, + ) + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "schema_not_found" + assert err["searched_owners"] == [TEST_OWNER, "_system"] + + +@pytest.mark.asyncio +async def test_create_record_surfaces_validation_errors(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", tags=[], description="s", + family="schema:person", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, "required": ["name"]}, + ) + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", tags=[], description="bad person", + logical_key="p1", + schema_ref="schema:person", schema_version="1.0.0", + content={"age": "thirty"}, # missing name; wrong age type + ) + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "validation_failed" + validators = {ve["validator"] for ve in err["validation_errors"]} + assert "required" in validators + assert "type" in validators + + +@pytest.mark.asyncio +async def test_create_record_upsert_on_same_logical_key(configured_server): + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + ) + r1 = json.loads(await create_record( + source="test", tags=[], description="v1", + logical_key="thing-one", + schema_ref="schema:thing", schema_version="1.0.0", + content={"v": 1}, + )) + assert r1["action"] == "created" + r2 = json.loads(await create_record( + source="test", tags=[], description="v2", + logical_key="thing-one", + schema_ref="schema:thing", schema_version="1.0.0", + content={"v": 2}, + )) + assert r2["action"] == "updated" + assert r2["id"] == r1["id"] + + +@pytest.mark.asyncio +async def test_create_record_uses_system_schema_fallback(configured_server): + """A record can reference a schema owned by _system, not the caller.""" + from mcp_awareness.tools import create_record + + # Seed _system schema directly via store (not via tool, since tool always writes to caller owner) + from mcp_awareness.schema import Entry, make_id, now_utc + _srv = configured_server + _srv.store.save_entry(Entry( + id=make_id(), type=EntryType.SCHEMA, source="system", + tags=["system"], created=now_utc(), updated=None, expires=None, + data={ + "family": "schema:system-thing", "version": "1.0.0", + "schema": {"type": "object"}, + "description": "system-seeded", "learned_from": "cli-bootstrap", + }, + logical_key="schema:system-thing:1.0.0", owner_id="_system", + )) + response = await create_record( + source="test", tags=[], description="mine", + logical_key="mine-1", + schema_ref="schema:system-thing", schema_version="1.0.0", + content={"any": "thing"}, + ) + body = json.loads(response) + assert body["status"] == "ok" +``` + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_tools_schema_record.py -v -k create_record` +Expected: FAIL — `create_record` not defined. + +- [ ] **Step 3: Implement** + +Add to `src/mcp_awareness/tools.py` following the existing `remember` pattern (especially for logical_key upsert behavior): + +```python +@_srv.mcp.tool() +@_timed +async def create_record( + source: str, + tags: list[str], + description: str, + logical_key: str, + schema_ref: str, + schema_version: str, + content: Any, + learned_from: str = "conversation", + language: str | None = None, +) -> str: + """Create or upsert a record validated against a registered schema. + + Resolves the target schema by schema_ref + schema_version (prefers + caller-owned, falls back to _system). Validates content against the + schema on write; rejects with a structured validation_failed error + listing every validation error. Upserts on matching (source, logical_key) + — same logical_key means update in place with changelog. + + Returns: + JSON: {"status": "ok", "id": "", "action": "created" | "updated"}""" + from jsonschema import exceptions as jse + from mcp_awareness.validation import resolve_schema, validate_record_content + + resolved = resolve_schema(_srv.store, _srv._current_owner(), schema_ref, schema_version) + if resolved is None: + _error_response( + "schema_not_found", + f"No schema {schema_ref}:{schema_version} in your namespace or _system", + retryable=False, + schema_ref=schema_ref, schema_version=schema_version, + searched_owners=[_srv._current_owner(), "_system"], + ) + + schema_body = resolved.data["schema"] + try: + errors = validate_record_content(schema_body, content) + except jse.JsonSchemaException as e: + _error_response( + "validation_error", f"Unexpected content validation error: {e}", + retryable=False, + ) + if errors: + n = errors[-1].get("total_errors") if errors[-1].get("truncated") else len(errors) + extras: dict[str, Any] = { + "schema_ref": schema_ref, + "schema_version": schema_version, + "validation_errors": errors, + } + if errors[-1].get("truncated"): + extras["truncated"] = True + extras["total_errors"] = errors[-1]["total_errors"] + _error_response( + "validation_failed", + f"Record content does not conform to schema {schema_ref}:{schema_version} ({n} errors)", + retryable=False, **extras, + ) + + # Existing logical_key upsert path (mirror `remember`'s approach) + now = now_utc() + data: dict[str, Any] = { + "schema_ref": schema_ref, + "schema_version": schema_version, + "content": content, + "description": description, + "learned_from": learned_from, + } + text_for_detect = compose_detection_text("record", data) + resolved_lang = resolve_language(explicit=language, text_for_detection=text_for_detect) + _check_unsupported_language(text_for_detect, resolved_lang) + + entry = Entry( + id=make_id(), + type=EntryType.RECORD, + source=source, + tags=tags, + created=now, + updated=None, + expires=None, + data=data, + logical_key=logical_key, + owner_id=_srv._current_owner(), + language=resolved_lang, + ) + # Upsert via existing store method that returns (entry, action) — mirror remember + saved, action = _srv.store.upsert_by_logical_key(entry) + _srv._generate_embedding(saved) + return json.dumps({"status": "ok", "id": saved.id, "action": action}) +``` + +The exact shape of `upsert_by_logical_key` is whatever `remember` calls today — copy that. + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_tools_schema_record.py -v -k create_record` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/tools.py tests/test_tools_schema_record.py +git commit -m "feat: add create_record MCP tool with schema validation and _system fallback" +``` + +--- + +## Task 13: Update `update_entry` handler for schema/record branching + +**Files:** +- Modify: `src/mcp_awareness/tools.py` (function `update_entry`, around line 533) +- Modify: `tests/test_tools_schema_record.py` + +- [ ] **Step 1: Write failing tests** + +Append to `tests/test_tools_schema_record.py`: + +```python +@pytest.mark.asyncio +async def test_update_entry_rejects_schema_update(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import register_schema, update_entry + + resp = json.loads(await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + )) + with pytest.raises(ToolError) as excinfo: + await update_entry(entry_id=resp["id"], description="new desc") + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "schema_immutable" + + +@pytest.mark.asyncio +async def test_update_entry_record_content_revalidates(configured_server): + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + r = json.loads(await create_record( + source="test", tags=[], description="r", + logical_key="r1", + schema_ref="schema:thing", schema_version="1.0.0", + content={"name": "good"}, + )) + # Valid update — passes re-validation + await update_entry(entry_id=r["id"], content={"name": "still-good"}) + + +@pytest.mark.asyncio +async def test_update_entry_record_content_rejects_invalid(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + r = json.loads(await create_record( + source="test", tags=[], description="r", + logical_key="r1", + schema_ref="schema:thing", schema_version="1.0.0", + content={"name": "good"}, + )) + with pytest.raises(ToolError) as excinfo: + await update_entry(entry_id=r["id"], content={"name": 123}) # wrong type + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "validation_failed" + + +@pytest.mark.asyncio +async def test_update_entry_record_non_content_skips_revalidation(configured_server): + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + r = json.loads(await create_record( + source="test", tags=[], description="orig", + logical_key="r1", + schema_ref="schema:thing", schema_version="1.0.0", + content={"name": "good"}, + )) + # Description-only change — no re-validation, even though pre-existing content would still pass + await update_entry(entry_id=r["id"], description="updated desc") + # No exception raised + + +@pytest.mark.asyncio +async def test_update_entry_record_pin_immutable(configured_server): + # This test only applies if update_entry exposes schema_ref/schema_version params; + # if it doesn't, the pin is already immutable by default. See Step 3 for the + # decision — we're NOT adding schema_ref/schema_version to update_entry's + # public surface, so this test is omitted. + pass +``` + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_tools_schema_record.py -v -k update_entry` +Expected: the `schema_immutable` and `validation_failed` tests fail (current update_entry accepts any entry without branching). + +- [ ] **Step 3: Implement branching** + +Edit `src/mcp_awareness/tools.py` inside the `update_entry` handler, after the entry is loaded by ID and before it's written back: + +```python +# --- New: type-specific branching --- +from mcp_awareness.validation import resolve_schema, validate_record_content + +if entry.type == EntryType.SCHEMA: + _error_response( + "schema_immutable", + "Schemas cannot be updated. Register a new version instead.", + retryable=False, + ) + +if entry.type == EntryType.RECORD and content is not None: + # content is being updated — re-resolve pinned schema and re-validate + schema_ref = entry.data["schema_ref"] + schema_version = entry.data["schema_version"] + resolved = resolve_schema(_srv.store, entry.owner_id, schema_ref, schema_version) + if resolved is None: + # The schema the record pins to has been soft-deleted — unusual, but possible + _error_response( + "schema_not_found", + f"Cannot re-validate: schema {schema_ref}:{schema_version} not found", + retryable=False, + schema_ref=schema_ref, schema_version=schema_version, + searched_owners=[entry.owner_id, "_system"], + ) + errors = validate_record_content(resolved.data["schema"], content) + if errors: + n = errors[-1].get("total_errors") if errors[-1].get("truncated") else len(errors) + extras = { + "schema_ref": schema_ref, "schema_version": schema_version, + "validation_errors": errors, + } + if errors[-1].get("truncated"): + extras["truncated"] = True + extras["total_errors"] = errors[-1]["total_errors"] + _error_response( + "validation_failed", + f"Record content does not conform to schema {schema_ref}:{schema_version} ({n} errors)", + retryable=False, **extras, + ) +# --- end branching --- +``` + +Note: `update_entry` should NOT accept `schema_ref`/`schema_version`/`family`/`version` params — those are out of scope for the update API. If any such params exist in the current signature, leave them out of the new tools' invocation paths. The test `test_update_entry_record_pin_immutable` is skipped because the pin fields aren't exposed. + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_tools_schema_record.py -v -k update_entry` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/tools.py tests/test_tools_schema_record.py +git commit -m "feat: update_entry enforces schema immutability and record re-validation" +``` + +--- + +## Task 14: Update `delete_entry` for schema deletion protection + +**Files:** +- Modify: `src/mcp_awareness/tools.py` (function `delete_entry`) +- Modify: `tests/test_tools_schema_record.py` + +- [ ] **Step 1: Write failing tests** + +Append to `tests/test_tools_schema_record.py`: + +```python +@pytest.mark.asyncio +async def test_delete_entry_schema_with_no_records_succeeds(configured_server): + from mcp_awareness.tools import delete_entry, register_schema + + resp = json.loads(await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + )) + await delete_entry(entry_id=resp["id"]) # no records; succeeds + # Verify soft-deleted + assert configured_server.store.find_schema(TEST_OWNER, "schema:thing:1.0.0") is None + + +@pytest.mark.asyncio +async def test_delete_entry_schema_with_records_rejected(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record, delete_entry, register_schema + + resp = json.loads(await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + )) + await create_record( + source="test", tags=[], description="r", + logical_key="r1", + schema_ref="schema:thing", schema_version="1.0.0", + content={}, + ) + with pytest.raises(ToolError) as excinfo: + await delete_entry(entry_id=resp["id"]) + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "schema_in_use" + assert len(err["referencing_records"]) == 1 + + +@pytest.mark.asyncio +async def test_delete_entry_schema_allowed_after_records_deleted(configured_server): + from mcp_awareness.tools import create_record, delete_entry, register_schema + + schema_resp = json.loads(await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + )) + record_resp = json.loads(await create_record( + source="test", tags=[], description="r", + logical_key="r1", + schema_ref="schema:thing", schema_version="1.0.0", + content={}, + )) + await delete_entry(entry_id=record_resp["id"]) + await delete_entry(entry_id=schema_resp["id"]) # no live refs; succeeds +``` + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_tools_schema_record.py -v -k delete_entry` +Expected: `schema_with_records_rejected` fails (no protection yet). + +- [ ] **Step 3: Implement branching** + +Edit `src/mcp_awareness/tools.py` inside `delete_entry`, after the entry is loaded: + +```python +from mcp_awareness.validation import SchemaInUseError, assert_schema_deletable + +if entry.type == EntryType.SCHEMA: + try: + assert_schema_deletable(_srv.store, entry.owner_id, entry.logical_key) + except SchemaInUseError as e: + _error_response( + "schema_in_use", + f"Cannot delete schema {entry.logical_key}: {e.total_count} record(s) reference it", + retryable=False, + referencing_records=e.referencing_records, + total_count=e.total_count, + ) +# Existing soft-delete path follows +``` + +- [ ] **Step 4: Run tests** + +Run: `pytest tests/test_tools_schema_record.py -v` +Expected: all pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/mcp_awareness/tools.py tests/test_tools_schema_record.py +git commit -m "feat: delete_entry protects schemas referenced by live records" +``` + +--- + +## Task 15: CLI tool — `mcp-awareness-register-schema` + +**Files:** +- Create: `src/mcp_awareness/cli_register_schema.py` +- Create: `tests/test_cli_register_schema.py` +- Modify: `pyproject.toml` + +- [ ] **Step 1: Write failing test** + +Create `tests/test_cli_register_schema.py`: + +```python +# AGPL preamble + +"""Tests for mcp-awareness-register-schema CLI.""" + +from __future__ import annotations + +import json +import subprocess +import sys +import tempfile + +import pytest + + +@pytest.fixture +def system_schema_file(): + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: + json.dump({"type": "object", "properties": {"name": {"type": "string"}}}, f) + path = f.name + yield path + + +def test_cli_register_schema_happy_path(pg_dsn, system_schema_file, monkeypatch, capsys): + """End-to-end: CLI writes a _system schema via direct store access.""" + from mcp_awareness.cli_register_schema import main + + monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) + sys_argv = [ + "mcp-awareness-register-schema", + "--system", + "--family", "schema:cli-test", + "--version", "1.0.0", + "--schema-file", system_schema_file, + "--source", "awareness-built-in", + "--tags", "cli,test", + "--description", "CLI-registered test schema", + ] + monkeypatch.setattr("sys.argv", sys_argv) + + main() + captured = capsys.readouterr() + body = json.loads(captured.out.strip()) + assert body["status"] == "ok" + assert body["logical_key"] == "schema:cli-test:1.0.0" + + # Verify entry exists in DB under _system owner + from mcp_awareness.postgres_store import PostgresStore + store = PostgresStore(pg_dsn) + entry = store.find_schema("any-caller", "schema:cli-test:1.0.0") + assert entry is not None + assert entry.owner_id == "_system" + assert entry.data["learned_from"] == "cli-bootstrap" + + +def test_cli_register_schema_rejects_invalid_schema_file(pg_dsn, monkeypatch, capsys): + from mcp_awareness.cli_register_schema import main + + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: + json.dump({"type": "strng"}, f) # invalid + path = f.name + + monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) + monkeypatch.setattr("sys.argv", [ + "mcp-awareness-register-schema", "--system", + "--family", "schema:bad", "--version", "1.0.0", + "--schema-file", path, "--source", "test", "--tags", "", "--description", "bad", + ]) + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 + captured = capsys.readouterr() + assert "invalid_schema" in captured.err +``` + +- [ ] **Step 2: Verify failure** + +Run: `pytest tests/test_cli_register_schema.py -v` +Expected: FAIL — module not found. + +- [ ] **Step 3: Implement** + +Create `src/mcp_awareness/cli_register_schema.py`: + +```python +# AGPL preamble + +"""CLI for registering _system-owned schema entries. + +Bypasses MCP entirely — operator tool, run once per built-in schema at +deploy/bootstrap time. No MCP auth, no middleware, direct PostgresStore +access. +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from pathlib import Path + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Register a _system-owned schema entry (operator bootstrap only).", + ) + parser.add_argument("--system", action="store_true", required=True, + help="Required. Confirms the caller intends to write to the _system owner.") + parser.add_argument("--family", required=True, help="Schema family (e.g., schema:edge-manifest)") + parser.add_argument("--version", required=True, help="Schema version (e.g., 1.0.0)") + parser.add_argument("--schema-file", required=True, type=Path, + help="Path to JSON file containing the Draft 2020-12 schema body") + parser.add_argument("--source", required=True, help="Source field for the entry") + parser.add_argument("--tags", default="", + help="Comma-separated tags (empty string for none)") + parser.add_argument("--description", required=True, help="Entry description") + args = parser.parse_args() + + # Read + parse schema file + if not args.schema_file.exists(): + print(json.dumps({"error": {"code": "file_not_found", "message": str(args.schema_file)}}), + file=sys.stderr) + sys.exit(1) + try: + schema_body = json.loads(args.schema_file.read_text()) + except json.JSONDecodeError as e: + print(json.dumps({"error": {"code": "invalid_json", "message": str(e)}}), file=sys.stderr) + sys.exit(1) + + # Meta-schema validation + from jsonschema import exceptions as jse + from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body + try: + validate_schema_body(schema_body) + except jse.SchemaError as e: + print(json.dumps({"error": { + "code": "invalid_schema", "message": str(e.message), + "schema_error_path": "/" + "/".join(str(p) for p in e.absolute_path), + }}), file=sys.stderr) + sys.exit(1) + + # DB connection + database_url = os.environ.get("AWARENESS_DATABASE_URL", "") + if not database_url: + print(json.dumps({"error": {"code": "missing_env", "message": "AWARENESS_DATABASE_URL required"}}), + file=sys.stderr) + sys.exit(1) + + from mcp_awareness.postgres_store import PostgresStore + from mcp_awareness.schema import Entry, EntryType, make_id, now_utc + + store = PostgresStore(database_url) + logical_key = compose_schema_logical_key(args.family, args.version) + tags = [t.strip() for t in args.tags.split(",") if t.strip()] + + entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source=args.source, + tags=tags, + created=now_utc(), + updated=None, + expires=None, + data={ + "family": args.family, + "version": args.version, + "schema": schema_body, + "description": args.description, + "learned_from": "cli-bootstrap", + }, + logical_key=logical_key, + owner_id="_system", + language="english", + ) + + try: + store.save_entry(entry) + except Exception as e: + print(json.dumps({"error": {"code": "store_error", "message": str(e)}}), file=sys.stderr) + sys.exit(1) + + print(json.dumps({"status": "ok", "id": entry.id, "logical_key": logical_key})) + sys.exit(0) + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 4: Register console script** + +Edit `pyproject.toml`: + +```toml +[project.scripts] +# ... existing scripts ... +mcp-awareness-register-schema = "mcp_awareness.cli_register_schema:main" +``` + +- [ ] **Step 5: Reinstall and test** + +Run: `pip install -e ".[dev]"` +Run: `pytest tests/test_cli_register_schema.py -v` +Expected: all pass. + +- [ ] **Step 6: Commit** + +```bash +git add src/mcp_awareness/cli_register_schema.py tests/test_cli_register_schema.py pyproject.toml +git commit -m "feat: add mcp-awareness-register-schema CLI for _system schemas" +``` + +--- + +## Task 16: Cross-owner isolation tests + +**Files:** +- Modify: `tests/test_tools_schema_record.py` + +- [ ] **Step 1: Add isolation tests** + +Append to `tests/test_tools_schema_record.py`: + +```python +@pytest.mark.asyncio +async def test_cross_owner_schema_invisible(configured_server, store): + """Owner A registers a schema; Owner B cannot resolve it.""" + from mcp_awareness.server import current_owner + from mcp_awareness.tools import create_record, register_schema + from mcp.server.fastmcp.exceptions import ToolError + + # Owner A (default TEST_OWNER) registers + await register_schema( + source="test", tags=[], description="A's schema", + family="schema:mine", version="1.0.0", + schema={"type": "object"}, + ) + + # Switch to Owner B + token = current_owner.set("other-owner") + try: + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", tags=[], description="B's attempt", + logical_key="r-b", schema_ref="schema:mine", schema_version="1.0.0", + content={}, + ) + err = json.loads(excinfo.value.args[0])["error"] + assert err["code"] == "schema_not_found" + finally: + current_owner.reset(token) + + +@pytest.mark.asyncio +async def test_both_owners_see_system_schema(configured_server, store): + """Both A and B can use a _system schema; their records don't cross.""" + from mcp_awareness.schema import Entry, make_id, now_utc + from mcp_awareness.server import current_owner + from mcp_awareness.tools import create_record + + # Seed _system schema directly + store.save_entry(Entry( + id=make_id(), type=EntryType.SCHEMA, source="system", + tags=["system"], created=now_utc(), updated=None, expires=None, + data={ + "family": "schema:shared", "version": "1.0.0", + "schema": {"type": "object"}, + "description": "shared", "learned_from": "cli-bootstrap", + }, + logical_key="schema:shared:1.0.0", owner_id="_system", + )) + + # A writes a record + a_resp = json.loads(await create_record( + source="test", tags=[], description="A's record", + logical_key="rec-a", schema_ref="schema:shared", schema_version="1.0.0", + content={"who": "alice"}, + )) + + # Switch to B + token = current_owner.set("bob") + try: + b_resp = json.loads(await create_record( + source="test", tags=[], description="B's record", + logical_key="rec-b", schema_ref="schema:shared", schema_version="1.0.0", + content={"who": "bob"}, + )) + assert b_resp["status"] == "ok" + finally: + current_owner.reset(token) + + # A's record invisible to B — verified via the owner_id on each entry + # (the records are already isolated by owner_id on create) + a_entry = store.get_entry(TEST_OWNER, a_resp["id"]) # exists + assert a_entry is not None + # Call with bob's owner — returns None because RLS/owner filter excludes + # (if get_entry takes owner_id as arg, this is clean; otherwise use find) +``` + +- [ ] **Step 2: Run tests** + +Run: `pytest tests/test_tools_schema_record.py -v` +Expected: all pass. + +- [ ] **Step 3: Commit** + +```bash +git add tests/test_tools_schema_record.py +git commit -m "test: cross-owner isolation for schema/record tools" +``` + +--- + +## Task 17: Update CHANGELOG, README, data-dictionary, server instructions + +**Files:** +- Modify: `CHANGELOG.md` +- Modify: `README.md` +- Modify: `docs/data-dictionary.md` +- Modify: `src/mcp_awareness/instructions.md` + +- [ ] **Step 1: CHANGELOG entry** + +Add under `[Unreleased]`: + +```markdown +### Added +- Two new entry types: `schema` (JSON Schema Draft 2020-12 definition) and `record` (validated payload conforming to a schema). Tools: `register_schema`, `create_record`. Schemas are absolutely immutable after registration; records re-validate on content update. Schema deletion is blocked while live records reference a version. Per-owner storage with a shared `_system` fallback namespace for built-in schemas. +- New CLI: `mcp-awareness-register-schema` for operators to seed `_system`-owned schemas at deploy time. +- New migration: `_system` user seed (idempotent). + +### Dependencies +- Added `jsonschema>=4.26.0` as a runtime dependency. +``` + +- [ ] **Step 2: README updates** + +- Bump tool count in the "Implemented" section (search for "tools" to find it). +- Add a bullet to the tool list describing `register_schema` / `create_record`. +- Bump test count after the test-count check in Task 19. + +Exact text for the new tool bullet (match the style of neighbors): + +```markdown +- **`register_schema` / `create_record`** — define typed data contracts via JSON Schema Draft 2020-12; validate payloads server-side on write with structured error envelopes listing every validation failure. +``` + +- [ ] **Step 3: Data dictionary** + +Add entries to `docs/data-dictionary.md` for both types. Match existing entry format: + +```markdown +### `schema` +JSON Schema Draft 2020-12 definition. Schema body lives in `data.schema`; family + version in `data.family` + `data.version`; `logical_key` derived as `{family}:{version}`. Immutable after registration. + +**`data` fields:** +- `family` (string, required) — schema family identifier (e.g., `schema:edge-manifest`) +- `version` (string, required) — schema version (user-chosen semantic or sequential) +- `schema` (object, required) — JSON Schema Draft 2020-12 body +- `description` (string) — human-readable description +- `learned_from` (string) — platform that registered the schema + +### `record` +Validated data entry conforming to a referenced schema. Content in `data.content`; pinned schema in `data.schema_ref` + `data.schema_version`. Re-validated on content update. + +**`data` fields:** +- `schema_ref` (string, required) — target schema family (e.g., `schema:edge-manifest`) +- `schema_version` (string, required) — target schema version (exact pin, no "latest") +- `content` (any JSON value, required) — validated payload +- `description` (string) — human-readable description +- `learned_from` (string) — platform that created the record +``` + +- [ ] **Step 4: Server instructions** + +Append to `src/mcp_awareness/instructions.md` (or wherever server-level guidance lives): + +```markdown +When you need typed data contracts for edge providers, tag taxonomies, or any +shape that should be validated on write: register a schema via `register_schema` +(family + version + JSON Schema body), then write records via `create_record` +referencing `schema_ref` + `schema_version`. Schemas are immutable — bump the +version to evolve. Built-in shared schemas live in the `_system` namespace +seeded by the operator. +``` + +- [ ] **Step 5: Commit** + +```bash +git add CHANGELOG.md README.md docs/data-dictionary.md src/mcp_awareness/instructions.md +git commit -m "docs: document schema/record entry types, new tools, and CLI" +``` + +--- + +## Task 18: Pre-push verification (ruff, mypy, full test suite, coverage, test count) + +**Files:** none — pure verification. + +- [ ] **Step 1: Format** + +Run: `ruff format src/ tests/` +Expected: no changes or minor formatting only. + +- [ ] **Step 2: Lint** + +Run: `ruff check src/ tests/` +Expected: 0 errors. + +- [ ] **Step 3: Type check** + +Run: `mypy src/mcp_awareness/` +Expected: 0 errors in strict mode. + +- [ ] **Step 4: Full test suite with coverage** + +Run: `pytest --cov=src/mcp_awareness --cov-report=term-missing` +Expected: all tests pass; verify coverage on new modules: + +- `src/mcp_awareness/validation.py` — 100% (pure functions, all paths tested) +- `src/mcp_awareness/cli_register_schema.py` — cover happy path, invalid schema, missing env, bad JSON +- New branches in `tools.py` — cover all new error codes (`schema_immutable`, `validation_failed`, `schema_not_found`, `schema_in_use`, `record_schema_pin_immutable`, `invalid_schema`, `schema_already_exists`) + +If any line is uncovered, add a test case; never use `pragma: no cover`. + +- [ ] **Step 5: Update test count in README** + +Run: `pytest --collect-only -q | tail -3` to get exact count, then update the number in `README.md`. + +- [ ] **Step 6: Commit docs fix-up if test count changed** + +```bash +git add README.md +git commit -m "docs: update test count after schema/record tests" +``` + +- [ ] **Step 7: Push branch** + +```bash +git push -u origin feat/schema-record-entry-types +``` + +--- + +## Task 19: Open PR with QA section + +**Files:** PR body only. + +- [ ] **Step 1: Author PR body** + +Title: `feat: add schema and record entry types with JSON Schema validation` + +Body: + +```markdown +## Summary + +- Adds two new `EntryType` values (`schema`, `record`) with JSON Schema Draft 2020-12 validation on write. +- Per-owner storage with `_system` fallback for shared built-in schemas. +- Schemas are absolutely immutable after registration; records re-validate on content update. +- Schema deletion blocked while live records reference a version. +- New CLI tool `mcp-awareness-register-schema` for operator bootstrap of `_system`-owned schemas. +- Adds `jsonschema>=4.26.0` dependency. + +Closes #208. Spec: `docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md`. Plan: `docs/superpowers/plans/2026-04-13-schema-record-entry-types-plan.md`. + +## QA + +### Prerequisites + +- `pip install -e ".[dev]"` +- Deploy to QA test instance on alternate port (`AWARENESS_PORT=8421`) via `docker-compose.qa.yaml`. +- Run `mcp-awareness-migrate` against the QA DB to apply the `_system` user seed. + +### Manual tests (via MCP tools) + +1. - [ ] **Register a schema** + ``` + register_schema(source="qa-test", tags=["qa"], description="qa test schema", + family="schema:qa-thing", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}) + ``` + Expected: `{"status":"ok","id":"","logical_key":"schema:qa-thing:1.0.0"}` + +2. - [ ] **Reject invalid schema (meta-schema check)** + ``` + register_schema(source="qa-test", tags=[], description="bad", + family="schema:bad", version="1.0.0", + schema={"type": "strng"}) + ``` + Expected: structured error with `code: "invalid_schema"`, `schema_error_path`, `detail`. + +3. - [ ] **Reject duplicate family+version** + Re-run step 1 exactly. Expected: `code: "schema_already_exists"`, `logical_key`, `existing_id`. + +4. - [ ] **Create a valid record** + ``` + create_record(source="qa-test", tags=[], description="a qa thing", + logical_key="qa-rec-1", schema_ref="schema:qa-thing", schema_version="1.0.0", + content={"name": "widget"}) + ``` + Expected: `{"status":"ok","id":"","action":"created"}` + +5. - [ ] **Reject record with invalid content (shows all errors)** + ``` + create_record(source="qa-test", tags=[], description="bad record", + logical_key="qa-rec-bad", schema_ref="schema:qa-thing", schema_version="1.0.0", + content={"unexpected": 42}) # missing required 'name' + ``` + Expected: `code: "validation_failed"`, `validation_errors` list with `path`, `validator`, `schema_path`. + +6. - [ ] **Upsert record via same logical_key** + Re-run step 4 with different content. Expected: `action: "updated"`, same `id` as step 4. + +7. - [ ] **Re-validation on record update (valid)** + ``` + update_entry(entry_id=, content={"name": "still-valid"}) + ``` + Expected: `{"status":"ok"}` (or existing update_entry response shape). + +8. - [ ] **Re-validation on record update (invalid → rejected)** + ``` + update_entry(entry_id=, content={"name": 123}) + ``` + Expected: `code: "validation_failed"`; record content unchanged (verify via `get_knowledge`). + +9. - [ ] **Schema immutability** + ``` + update_entry(entry_id=, description="new desc") + ``` + Expected: `code: "schema_immutable"`; schema unchanged. + +10. - [ ] **Schema deletion blocked by live records** + ``` + delete_entry(entry_id=) + ``` + Expected: `code: "schema_in_use"`, `referencing_records: [...]`, `total_count`. + +11. - [ ] **Schema deletion allowed after records deleted** + Delete the record from step 4 via `delete_entry(entry_id=)`, then retry step 10. + Expected: schema soft-deletes successfully. + +12. - [ ] **`_system` fallback works** + Via QA shell: `mcp-awareness-register-schema --system --family schema:qa-system --version 1.0.0 --schema-file /tmp/qa-system-schema.json --source qa-built-in --tags qa --description "qa system schema"`. + Then via MCP: + ``` + create_record(source="qa-test", tags=[], description="uses system schema", + logical_key="qa-sys-rec", schema_ref="schema:qa-system", schema_version="1.0.0", + content={"any": "thing"}) + ``` + Expected: record created successfully. + +13. - [ ] **Cross-owner isolation** + As a second authenticated user, attempt to resolve the step-1 schema. Expected: `code: "schema_not_found"`. +EOF +``` + +- [ ] **Step 2: Create the PR** + +```bash +source ~/github.com/cmeans/claude-dev/github-app/activate.sh && \ + gh pr create \ + --title "feat: add schema and record entry types with JSON Schema validation" \ + --body-file <(cat <<'EOF' + +EOF +) \ + --label "enhancement" \ + --label "Dev Active" +``` + +(Exact label discipline per `feedback_label_discipline.md` — set `Dev Active` on push, let automation transition to `Awaiting CI` → `Ready for QA`.) + +- [ ] **Step 3: Poll CI and transition labels per project workflow** + +Per `feedback_poll_ci_after_push.md` — after push, run `gh pr checks ` immediately. On green, apply `Ready for QA`. Per `feedback_codecov_comment.md` — read the Codecov bot comment, fix any missing lines before marking Ready for QA. + +--- + +## Self-Review + +**Spec coverage check:** + +Walking the design doc section by section: + +- D1 (type-specific tools) → Tasks 11, 12 ✓ +- D2 (per-owner + `_system` fallback) → Task 6 (SQL-level) + Task 10 (seed) ✓ +- D3 (CLI-only `_system` writes) → Task 15 ✓ +- D4 (absolute schema immutability) → Task 13 (schema branch) ✓ +- D5 (record mutability with re-validation) → Task 13 (record branch) ✓ +- D6 (all errors via `iter_errors()`) → Task 5 ✓ +- D7 (server-derived `logical_key`) → Task 3 + used in Tasks 11/12 ✓ +- D8 (any JSON value for `content`) → Task 5 tests include primitive + array schemas ✓ + +**Architecture:** `validation.py` covered Tasks 3–5, 8, 9 ✓; Store changes covered Tasks 6, 7 ✓; Tool changes covered Tasks 11–14 ✓; CLI covered Task 15 ✓; Migration covered Task 10 ✓. + +**Error codes:** every code in the spec's error table is exercised by at least one test: `invalid_schema` (Task 11), `schema_already_exists` (Task 11), `schema_not_found` (Task 12), `validation_failed` (Tasks 12, 13), `schema_immutable` (Task 13), `schema_in_use` (Task 14). `invalid_parameter` inherited from existing helper. `record_schema_pin_immutable` is NOT tested — because `update_entry` doesn't expose `schema_ref`/`schema_version` params. Either keep it as a code reserved for a future API change, or drop the code from the spec. **Decision: keep as reserved; no test needed for a code that can't be triggered given the current API.** + +**Deployment:** Operator deploy sequence from the spec mapped to Task 18 (migration) + Task 15 (CLI) + PR-body QA steps. Compose files untouched; called out explicitly. + +**Testing:** Unit (Tasks 3–5, 8, 9) + integration (Tasks 6, 7, 11–14, 16) + CLI (Task 15) + coverage gate (Task 18). Cross-owner isolation explicit in Task 16. + +**Placeholder scan:** No "TBD" / "TODO" in task bodies. Each code step shows actual code. Each run step shows exact command + expected outcome. The one placeholder concession is migration revision id (`m8h9i0j1k2l3`) which depends on head-at-implementation-time — Task 10 Step 1 instructs how to pick it. + +**Type consistency:** Function names consistent throughout: `compose_schema_logical_key`, `validate_schema_body`, `validate_record_content`, `resolve_schema`, `assert_schema_deletable`, `SchemaInUseError`. Store methods: `find_schema`, `count_records_referencing`. Tool names: `register_schema`, `create_record`. Error codes match spec table exactly. + +--- + +## Execution Handoff + +Plan complete and saved to `docs/superpowers/plans/2026-04-13-schema-record-entry-types-plan.md`. Two execution options: + +1. **Subagent-Driven (recommended)** — I dispatch a fresh subagent per task, review between tasks, fast iteration. +2. **Inline Execution** — I execute tasks in this session using `executing-plans`, batch execution with checkpoints. + +Which approach do you want? From a59c76d7acf16371b9f493f5828d6cd21da499cb Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:36:26 -0500 Subject: [PATCH 03/28] feat: add SCHEMA and RECORD to EntryType enum Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/schema.py | 2 ++ tests/test_schema.py | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/mcp_awareness/schema.py b/src/mcp_awareness/schema.py index 192d35a..d6e7973 100644 --- a/src/mcp_awareness/schema.py +++ b/src/mcp_awareness/schema.py @@ -36,6 +36,8 @@ class EntryType(str, Enum): PREFERENCE = "preference" NOTE = "note" INTENTION = "intention" + SCHEMA = "schema" + RECORD = "record" # Valid states for the INTENTION lifecycle diff --git a/tests/test_schema.py b/tests/test_schema.py index b631ce1..821a349 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -247,3 +247,13 @@ def test_to_list_dict_intention_includes_goal_state(): assert d["goal"] == "Pick up milk" assert d["state"] == "pending" assert "data" not in d + + +def test_entry_type_schema_value(): + assert EntryType.SCHEMA.value == "schema" + assert EntryType("schema") is EntryType.SCHEMA + + +def test_entry_type_record_value(): + assert EntryType.RECORD.value == "record" + assert EntryType("record") is EntryType.RECORD From 20ae90cba1371a77745326700b4427c224f8f4f3 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:38:56 -0500 Subject: [PATCH 04/28] chore: add jsonschema>=4.26.0 dependency Co-Authored-By: Claude Opus 4.6 (1M context) --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8a8a96e..56ebe47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "phonenumbers>=8.13,<10.0", "zxcvbn>=4.5.0,<5.0", "lingua-language-detector>=2.1.1,<3.0", + "jsonschema>=4.26.0,<5", ] [project.scripts] From 1790f74d768f57ccf9169bdd693ae9aaf9126569 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:41:40 -0500 Subject: [PATCH 05/28] feat: add validation module with compose_schema_logical_key Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/validation.py | 33 +++++++++++++++++++++++++++++++++ tests/test_validation.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/mcp_awareness/validation.py create mode 100644 tests/test_validation.py diff --git a/src/mcp_awareness/validation.py b/src/mcp_awareness/validation.py new file mode 100644 index 0000000..0093f32 --- /dev/null +++ b/src/mcp_awareness/validation.py @@ -0,0 +1,33 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Validation helpers for Schema and Record entry types. + +Pure functions wrapping jsonschema Draft 2020-12 validation and schema +lookup with _system fallback. Kept out of the store layer so the Store +protocol stays swappable (no jsonschema import in store.py). +""" + +from __future__ import annotations + + +def compose_schema_logical_key(family: str, version: str) -> str: + """Derive the canonical logical_key for a schema entry. + + Single source of truth for the family+version → logical_key format. + Used by register_schema on write and by resolve_schema on lookup. + """ + return f"{family}:{version}" diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 0000000..64c5cd2 --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,31 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Tests for src/mcp_awareness/validation.py.""" + +from __future__ import annotations + +import pytest + +from mcp_awareness.validation import compose_schema_logical_key + + +def test_compose_schema_logical_key_basic(): + assert compose_schema_logical_key("schema:edge-manifest", "1.0.0") == "schema:edge-manifest:1.0.0" + + +def test_compose_schema_logical_key_no_prefix(): + assert compose_schema_logical_key("tag-taxonomy", "0.1.0") == "tag-taxonomy:0.1.0" From 8d7f3a67675c1d78ce16956418d680e748cb147f Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:42:45 -0500 Subject: [PATCH 06/28] feat: add validate_schema_body for Draft 2020-12 meta-schema check Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/validation.py | 14 ++++++++++++++ tests/test_validation.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/mcp_awareness/validation.py b/src/mcp_awareness/validation.py index 0093f32..28d544b 100644 --- a/src/mcp_awareness/validation.py +++ b/src/mcp_awareness/validation.py @@ -23,6 +23,10 @@ from __future__ import annotations +from typing import Any + +from jsonschema import Draft202012Validator + def compose_schema_logical_key(family: str, version: str) -> str: """Derive the canonical logical_key for a schema entry. @@ -31,3 +35,13 @@ def compose_schema_logical_key(family: str, version: str) -> str: Used by register_schema on write and by resolve_schema on lookup. """ return f"{family}:{version}" + + +def validate_schema_body(schema: Any) -> None: + """Validate a schema body against the JSON Schema Draft 2020-12 meta-schema. + + Raises jsonschema.exceptions.SchemaError on invalid schema. Callers at + the MCP boundary translate this into a structured 'invalid_schema' error + response; direct callers (CLI) format to stderr. + """ + Draft202012Validator.check_schema(schema) diff --git a/tests/test_validation.py b/tests/test_validation.py index 64c5cd2..909c6ee 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -18,9 +18,10 @@ from __future__ import annotations +import jsonschema import pytest -from mcp_awareness.validation import compose_schema_logical_key +from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body def test_compose_schema_logical_key_basic(): @@ -29,3 +30,29 @@ def test_compose_schema_logical_key_basic(): def test_compose_schema_logical_key_no_prefix(): assert compose_schema_logical_key("tag-taxonomy", "0.1.0") == "tag-taxonomy:0.1.0" + + +def test_validate_schema_body_accepts_valid_object_schema(): + schema = { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name"], + } + validate_schema_body(schema) # must not raise + + +def test_validate_schema_body_rejects_bad_type(): + schema = {"type": "strng"} # typo: 'strng' is not a valid JSON Schema type + with pytest.raises(jsonschema.exceptions.SchemaError): + validate_schema_body(schema) + + +def test_validate_schema_body_accepts_empty_object(): + # Empty schema matches anything — valid per spec + validate_schema_body({}) + + +def test_validate_schema_body_rejects_non_dict(): + # Schemas must be objects; bare arrays fail meta-schema + with pytest.raises(jsonschema.exceptions.SchemaError): + validate_schema_body([{"type": "string"}]) # type: ignore[arg-type] From e37b52345847a0c81a278e0243048c734d2519c8 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:43:56 -0500 Subject: [PATCH 07/28] feat: add validate_record_content with iter_errors and truncation Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/validation.py | 30 ++++++++++++- tests/test_validation.py | 77 ++++++++++++++++++++++++++++++++- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/src/mcp_awareness/validation.py b/src/mcp_awareness/validation.py index 28d544b..1aca246 100644 --- a/src/mcp_awareness/validation.py +++ b/src/mcp_awareness/validation.py @@ -25,7 +25,7 @@ from typing import Any -from jsonschema import Draft202012Validator +from jsonschema import Draft202012Validator, ValidationError def compose_schema_logical_key(family: str, version: str) -> str: @@ -45,3 +45,31 @@ def validate_schema_body(schema: Any) -> None: response; direct callers (CLI) format to stderr. """ Draft202012Validator.check_schema(schema) + + +_MAX_VALIDATION_ERRORS = 50 + + +def _flatten_error(err: ValidationError) -> dict[str, Any]: + """Flatten a jsonschema ValidationError to a structured dict for the error envelope.""" + return { + "path": err.json_path, + "message": err.message, + "validator": err.validator, + "schema_path": "/" + "/".join(str(p) for p in err.schema_path), + } + + +def validate_record_content(schema_body: dict[str, Any], content: Any) -> list[dict[str, Any]]: + """Validate content against a schema body. Returns list of structured errors. + + Empty list means valid. List truncated at _MAX_VALIDATION_ERRORS; when + truncated, final entry is {'truncated': True, 'total_errors': }. + """ + validator = Draft202012Validator(schema_body) + all_errors = sorted(validator.iter_errors(content), key=lambda e: e.path) + if len(all_errors) <= _MAX_VALIDATION_ERRORS: + return [_flatten_error(e) for e in all_errors] + kept = [_flatten_error(e) for e in all_errors[:_MAX_VALIDATION_ERRORS]] + kept.append({"truncated": True, "total_errors": len(all_errors)}) + return kept diff --git a/tests/test_validation.py b/tests/test_validation.py index 909c6ee..8bc54b6 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -21,7 +21,17 @@ import jsonschema import pytest -from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body +from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body, validate_record_content + + +_PERSON_SCHEMA = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0}, + }, + "required": ["name"], +} def test_compose_schema_logical_key_basic(): @@ -56,3 +66,68 @@ def test_validate_schema_body_rejects_non_dict(): # Schemas must be objects; bare arrays fail meta-schema with pytest.raises(jsonschema.exceptions.SchemaError): validate_schema_body([{"type": "string"}]) # type: ignore[arg-type] + + +def test_validate_record_content_valid_returns_empty_list(): + assert validate_record_content(_PERSON_SCHEMA, {"name": "Alice", "age": 30}) == [] + + +def test_validate_record_content_surfaces_missing_required(): + errors = validate_record_content(_PERSON_SCHEMA, {"age": 30}) + assert len(errors) == 1 + assert errors[0]["validator"] == "required" + assert "name" in errors[0]["message"] + + +def test_validate_record_content_surfaces_all_errors(): + # Missing 'name' AND age is wrong type + errors = validate_record_content(_PERSON_SCHEMA, {"age": "thirty"}) + assert len(errors) == 2 + validators = {e["validator"] for e in errors} + assert validators == {"required", "type"} + + +def test_validate_record_content_is_sorted_by_path(): + schema = { + "type": "object", + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"}, + "c": {"type": "integer"}, + }, + } + errors = validate_record_content(schema, {"a": "x", "b": "y", "c": "z"}) + paths = [e["path"] for e in errors] + assert paths == sorted(paths) + + +def test_validate_record_content_accepts_primitive_schema(): + schema = {"type": "integer"} + assert validate_record_content(schema, 42) == [] + errors = validate_record_content(schema, "abc") + assert len(errors) == 1 + assert errors[0]["validator"] == "type" + + +def test_validate_record_content_array_schema_with_index_paths(): + schema = {"type": "array", "items": {"type": "integer"}} + errors = validate_record_content(schema, [1, "two", 3, "four"]) + assert len(errors) == 2 + # Array indices should appear in paths + paths = [e["path"] for e in errors] + assert any("1" in p for p in paths) + assert any("3" in p for p in paths) + + +def test_validate_record_content_truncates_at_50(): + schema = { + "type": "array", + "items": {"type": "integer"}, + } + # 60 wrong-type items — all fail + result = validate_record_content(schema, ["x"] * 60) + assert isinstance(result, list) + # Truncation is carried via a special sentinel entry at the end + assert len(result) == 51 # 50 errors + 1 truncation marker + assert result[-1]["truncated"] is True + assert result[-1]["total_errors"] == 60 From 51d5a9765ba88c68fc888b2506e8f5838177cc03 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:47:40 -0500 Subject: [PATCH 08/28] feat: add Store.find_schema with _system fallback Adds find_schema(owner_id, logical_key) to the Store protocol and PostgresStore. A single SQL query with CASE-based ORDER BY returns the caller's own schema when present, falling back to the _system-owned version. Soft-deleted entries are excluded. Seeds the _system user in the test fixture and adds 5 tests covering all lookup scenarios. Co-Authored-By: Claude Sonnet 4.6 --- src/mcp_awareness/postgres_store.py | 16 +++++++ src/mcp_awareness/sql/find_schema.sql | 15 ++++++ src/mcp_awareness/store.py | 8 ++++ tests/conftest.py | 9 ++++ tests/test_store.py | 68 +++++++++++++++++++++++++++ 5 files changed, 116 insertions(+) create mode 100644 src/mcp_awareness/sql/find_schema.sql diff --git a/src/mcp_awareness/postgres_store.py b/src/mcp_awareness/postgres_store.py index 76f1739..b64e32a 100644 --- a/src/mcp_awareness/postgres_store.py +++ b/src/mcp_awareness/postgres_store.py @@ -1375,6 +1375,22 @@ def get_referencing_entries(self, owner_id: str, entry_id: str) -> list[Entry]: (json.dumps([entry_id]),), ) + def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: + """Look up a schema, preferring caller-owned over _system-owned. + + Single query with CASE-based ORDER BY for predictable override + semantics: caller's own version wins, _system is fallback. + Soft-deleted entries are excluded. + """ + with self._pool.connection() as conn, conn.transaction(), conn.cursor() as cur: + self._set_rls_context(cur, owner_id) + cur.execute( + _load_sql("find_schema"), + (logical_key, owner_id, owner_id), + ) + row = cur.fetchone() + return self._row_to_entry(row) if row else None + # ------------------------------------------------------------------ # User operations (for OAuth auto-provisioning) # ------------------------------------------------------------------ diff --git a/src/mcp_awareness/sql/find_schema.sql b/src/mcp_awareness/sql/find_schema.sql new file mode 100644 index 0000000..45aee91 --- /dev/null +++ b/src/mcp_awareness/sql/find_schema.sql @@ -0,0 +1,15 @@ +/* name: find_schema */ +/* mode: literal */ +/* Look up a schema entry by logical_key, preferring caller-owned over _system. + Returns the caller's own version if present, otherwise the _system version. + Soft-deleted entries are excluded. + Params: logical_key, caller (owner_id), caller (owner_id again for ORDER BY) +*/ +SELECT id, type, source, tags, created, updated, expires, data, logical_key, owner_id, language, deleted +FROM entries +WHERE type = 'schema' + AND logical_key = %s + AND owner_id IN (%s, '_system') + AND deleted IS NULL +ORDER BY CASE WHEN owner_id = %s THEN 0 ELSE 1 END +LIMIT 1 diff --git a/src/mcp_awareness/store.py b/src/mcp_awareness/store.py index d625b2f..66c4552 100644 --- a/src/mcp_awareness/store.py +++ b/src/mcp_awareness/store.py @@ -341,6 +341,14 @@ def get_referencing_entries(self, owner_id: str, entry_id: str) -> list[Entry]: """Find entries whose data.related_ids contains the given entry_id.""" ... + def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: + """Look up a schema entry by logical_key, preferring caller-owned over _system. + + Returns the caller's own schema if present; falls back to the _system-owned + version if one exists. Returns None if not found or soft-deleted. + """ + ... + def clear(self, owner_id: str) -> None: """Delete all entries, reads, actions, and embeddings for an owner.""" ... diff --git a/tests/conftest.py b/tests/conftest.py index 4ca29ea..655f464 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,6 +26,7 @@ from mcp_awareness.postgres_store import PostgresStore TEST_OWNER = "test-owner" +SYSTEM_OWNER = "_system" # Set default owner for all tests before any module imports read it. os.environ["AWARENESS_DEFAULT_OWNER"] = TEST_OWNER @@ -61,5 +62,13 @@ def pg_dsn(pg_container): def store(pg_dsn): """Fresh PostgresStore for each test — tables created, then cleared after.""" s = PostgresStore(pg_dsn) + # Ensure _system user exists for cross-owner schema tests. + with s._pool.connection() as conn, conn.cursor() as cur: + cur.execute( + "INSERT INTO users (id, display_name) VALUES ('_system', 'System-managed schemas') " + "ON CONFLICT (id) DO NOTHING" + ) + conn.commit() yield s s.clear(TEST_OWNER) + s.clear(SYSTEM_OWNER) diff --git a/tests/test_store.py b/tests/test_store.py index 892f18a..9a5e7d1 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -3260,3 +3260,71 @@ def test_get_all_patterns(store): result = store.get_all_patterns(TEST_OWNER) assert "nas" in result assert "" in result + + +# ------------------------------------------------------------------ +# find_schema tests +# ------------------------------------------------------------------ + +SYSTEM_OWNER = "_system" + + +def _make_schema_entry(logical_key: str, schema_body: dict) -> Entry: + return Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="test", + tags=[], + created=now_utc(), + data={ + "family": logical_key.rsplit(":", 1)[0] if ":" in logical_key else logical_key, + "version": logical_key.rsplit(":", 1)[1] if ":" in logical_key else "1.0.0", + "schema": schema_body, + "description": "test schema", + "learned_from": "test", + }, + logical_key=logical_key, + ) + + +def test_find_schema_returns_caller_owned(store): + """find_schema returns an entry when caller owns it.""" + entry = _make_schema_entry("s:test:1.0.0", {"type": "object"}) + store.add(TEST_OWNER, entry) + found = store.find_schema(TEST_OWNER, "s:test:1.0.0") + assert found is not None + assert found.data["family"] == "s:test" + assert found.data["schema"] == {"type": "object"} + + +def test_find_schema_system_fallback(store): + """find_schema falls back to _system-owned schema when caller has none.""" + entry = _make_schema_entry("s:test:1.0.0", {"type": "object"}) + store.add(SYSTEM_OWNER, entry) + found = store.find_schema(TEST_OWNER, "s:test:1.0.0") + assert found is not None + assert found.data["schema"] == {"type": "object"} + + +def test_find_schema_caller_wins_over_system(store): + """find_schema prefers caller's schema over _system's when both exist.""" + system_entry = _make_schema_entry("s:test:1.0.0", {"type": "object"}) + caller_entry = _make_schema_entry("s:test:1.0.0", {"type": "string"}) + store.add(SYSTEM_OWNER, system_entry) + store.add(TEST_OWNER, caller_entry) + found = store.find_schema(TEST_OWNER, "s:test:1.0.0") + assert found is not None + assert found.data["schema"] == {"type": "string"} + + +def test_find_schema_returns_none_when_missing(store): + """find_schema returns None when no matching schema exists for caller or _system.""" + assert store.find_schema(TEST_OWNER, "s:nonexistent:1.0.0") is None + + +def test_find_schema_excludes_soft_deleted(store): + """find_schema does not return soft-deleted entries.""" + entry = _make_schema_entry("s:test:1.0.0", {"type": "object"}) + stored = store.add(TEST_OWNER, entry) + store.soft_delete_by_id(TEST_OWNER, stored.id) + assert store.find_schema(TEST_OWNER, "s:test:1.0.0") is None From e2dc055d353e92130732b58fd9fcc279fc15d68f Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:51:43 -0500 Subject: [PATCH 09/28] feat: add Store.count_records_referencing for schema deletion protection Adds count_records_referencing to the Store protocol and PostgresStore. Returns (total_count, first_10_ids) of non-deleted records that reference a given schema logical key (decomposed via rpartition on the last ':'). Backed by two SQL files following the one-operation-per-file convention. Five tests cover zero, match, soft-delete exclusion, version isolation, and the 10-id cap with 15 records. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/postgres_store.py | 21 +++++++ .../sql/count_records_referencing.sql | 14 +++++ .../sql/list_records_referencing_ids.sql | 14 +++++ src/mcp_awareness/store.py | 10 +++ tests/test_store.py | 61 +++++++++++++++++++ 5 files changed, 120 insertions(+) create mode 100644 src/mcp_awareness/sql/count_records_referencing.sql create mode 100644 src/mcp_awareness/sql/list_records_referencing_ids.sql diff --git a/src/mcp_awareness/postgres_store.py b/src/mcp_awareness/postgres_store.py index b64e32a..30e8ca8 100644 --- a/src/mcp_awareness/postgres_store.py +++ b/src/mcp_awareness/postgres_store.py @@ -1391,6 +1391,27 @@ def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: row = cur.fetchone() return self._row_to_entry(row) if row else None + def count_records_referencing( + self, owner_id: str, schema_logical_key: str + ) -> tuple[int, list[str]]: + """Count and sample-id records referencing a schema version. + + Splits schema_logical_key on the last ':' to obtain schema_ref and version. + schema_ref may itself contain ':' (e.g. "schema:edge-manifest:1.0.0"). + Matches data.schema_ref and data.schema_version in the record entries' JSONB. + """ + ref, _, version = schema_logical_key.rpartition(":") + with self._pool.connection() as conn, conn.transaction(), conn.cursor() as cur: + self._set_rls_context(cur, owner_id) + cur.execute(_load_sql("count_records_referencing"), (owner_id, ref, version)) + count_row = cur.fetchone() + count = int(count_row["cnt"]) if count_row else 0 + if count == 0: + return (0, []) + cur.execute(_load_sql("list_records_referencing_ids"), (owner_id, ref, version)) + ids = [str(r["id"]) for r in cur.fetchall()] + return (count, ids) + # ------------------------------------------------------------------ # User operations (for OAuth auto-provisioning) # ------------------------------------------------------------------ diff --git a/src/mcp_awareness/sql/count_records_referencing.sql b/src/mcp_awareness/sql/count_records_referencing.sql new file mode 100644 index 0000000..6ce8151 --- /dev/null +++ b/src/mcp_awareness/sql/count_records_referencing.sql @@ -0,0 +1,14 @@ +/* name: count_records_referencing */ +/* mode: literal */ +/* Count records referencing a schema version (for deletion-protection checks). + schema_logical_key is decomposed at the Python layer into (schema_ref, schema_version) + via rpartition(":") — schema_ref may itself contain ':' (e.g. "schema:edge-manifest"). + Params: owner_id, schema_ref, schema_version +*/ +SELECT COUNT(*) AS cnt +FROM entries +WHERE type = 'record' + AND owner_id = %s + AND data->>'schema_ref' = %s + AND data->>'schema_version' = %s + AND deleted IS NULL diff --git a/src/mcp_awareness/sql/list_records_referencing_ids.sql b/src/mcp_awareness/sql/list_records_referencing_ids.sql new file mode 100644 index 0000000..a2f335d --- /dev/null +++ b/src/mcp_awareness/sql/list_records_referencing_ids.sql @@ -0,0 +1,14 @@ +/* name: list_records_referencing_ids */ +/* mode: literal */ +/* Returns up to 10 record ids referencing a schema version, for deletion-blocker detail. + Params: owner_id, schema_ref, schema_version +*/ +SELECT id +FROM entries +WHERE type = 'record' + AND owner_id = %s + AND data->>'schema_ref' = %s + AND data->>'schema_version' = %s + AND deleted IS NULL +ORDER BY created +LIMIT 10 diff --git a/src/mcp_awareness/store.py b/src/mcp_awareness/store.py index 66c4552..3137f40 100644 --- a/src/mcp_awareness/store.py +++ b/src/mcp_awareness/store.py @@ -349,6 +349,16 @@ def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: """ ... + def count_records_referencing( + self, owner_id: str, schema_logical_key: str + ) -> tuple[int, list[str]]: + """Return (total_count, first_N_ids) of non-deleted records referencing a schema. + + The schema_logical_key is composed as f"{schema_ref}:{schema_version}". + Caller uses total_count for the error payload and ids for the blocker list. + """ + ... + def clear(self, owner_id: str) -> None: """Delete all entries, reads, actions, and embeddings for an owner.""" ... diff --git a/tests/test_store.py b/tests/test_store.py index 9a5e7d1..fbcedf0 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -3328,3 +3328,64 @@ def test_find_schema_excludes_soft_deleted(store): stored = store.add(TEST_OWNER, entry) store.soft_delete_by_id(TEST_OWNER, stored.id) assert store.find_schema(TEST_OWNER, "s:test:1.0.0") is None + + +# ------------------------------------------------------------------ +# count_records_referencing tests +# ------------------------------------------------------------------ + + +def _make_record_entry(logical_key: str, schema_ref: str, schema_version: str, content) -> Entry: + return Entry( + id=make_id(), + type=EntryType.RECORD, + source="test", + tags=[], + created=now_utc(), + data={ + "schema_ref": schema_ref, + "schema_version": schema_version, + "content": content, + "description": "test record", + "learned_from": "test", + }, + logical_key=logical_key, + ) + + +def test_count_records_referencing_returns_zero_when_none(store): + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 0 + assert ids == [] + + +def test_count_records_referencing_counts_matching_records(store): + for i in range(3): + store.add(TEST_OWNER, _make_record_entry(f"rec-{i}", "s:test", "1.0.0", {"i": i})) + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 3 + assert len(ids) == 3 + + +def test_count_records_referencing_excludes_soft_deleted(store): + entry = _make_record_entry("rec-1", "s:test", "1.0.0", {}) + store.add(TEST_OWNER, entry) + store.soft_delete_by_id(TEST_OWNER, entry.id) + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 0 + assert ids == [] + + +def test_count_records_referencing_ignores_other_versions(store): + store.add(TEST_OWNER, _make_record_entry("rec-1", "s:test", "1.0.0", {})) + store.add(TEST_OWNER, _make_record_entry("rec-2", "s:test", "2.0.0", {})) + count, _ = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 1 + + +def test_count_records_referencing_caps_id_list_at_ten(store): + for i in range(15): + store.add(TEST_OWNER, _make_record_entry(f"rec-{i}", "s:test", "1.0.0", {"i": i})) + count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") + assert count == 15 + assert len(ids) == 10 From 1b7bc0674af2f493bf2ceaf3e951c3cee730e1e3 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:53:03 -0500 Subject: [PATCH 10/28] feat: add validation.resolve_schema delegating to Store.find_schema Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/validation.py | 23 ++++++++++++++- tests/test_validation.py | 52 +++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/src/mcp_awareness/validation.py b/src/mcp_awareness/validation.py index 1aca246..5e137ae 100644 --- a/src/mcp_awareness/validation.py +++ b/src/mcp_awareness/validation.py @@ -23,7 +23,10 @@ from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any, Protocol + +if TYPE_CHECKING: + from mcp_awareness.schema import Entry from jsonschema import Draft202012Validator, ValidationError @@ -73,3 +76,21 @@ def validate_record_content(schema_body: dict[str, Any], content: Any) -> list[d kept = [_flatten_error(e) for e in all_errors[:_MAX_VALIDATION_ERRORS]] kept.append({"truncated": True, "total_errors": len(all_errors)}) return kept + + +class _SchemaFinder(Protocol): + """Minimal protocol for resolve_schema's store dependency.""" + + def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: + ... + + +def resolve_schema( + store: _SchemaFinder, owner_id: str, family: str, version: str +) -> Entry | None: + """Resolve a schema by family + version, preferring caller-owned. + + Delegates to Store.find_schema (which handles the _system fallback at + the SQL level). Returns the schema Entry or None. + """ + return store.find_schema(owner_id, compose_schema_logical_key(family, version)) diff --git a/tests/test_validation.py b/tests/test_validation.py index 8bc54b6..932a7bd 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -21,8 +21,12 @@ import jsonschema import pytest -from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body, validate_record_content - +from mcp_awareness.validation import ( + compose_schema_logical_key, + resolve_schema, + validate_record_content, + validate_schema_body, +) _PERSON_SCHEMA = { "type": "object", @@ -35,7 +39,9 @@ def test_compose_schema_logical_key_basic(): - assert compose_schema_logical_key("schema:edge-manifest", "1.0.0") == "schema:edge-manifest:1.0.0" + assert ( + compose_schema_logical_key("schema:edge-manifest", "1.0.0") == "schema:edge-manifest:1.0.0" + ) def test_compose_schema_logical_key_no_prefix(): @@ -131,3 +137,43 @@ def test_validate_record_content_truncates_at_50(): assert len(result) == 51 # 50 errors + 1 truncation marker assert result[-1]["truncated"] is True assert result[-1]["total_errors"] == 60 + + +class _StubStore: + """Minimal Store-like stub for validation unit tests. + + Records calls to find_schema and returns pre-configured results keyed by + (owner_id, logical_key). Only needs to implement find_schema; other Store + methods are never called by resolve_schema. + """ + + def __init__(self): + self._results: dict[tuple[str, str], object] = {} + self.calls: list[tuple[str, str]] = [] + + def set(self, owner_id: str, logical_key: str, result): + self._results[(owner_id, logical_key)] = result + + def find_schema(self, owner_id, logical_key): + self.calls.append((owner_id, logical_key)) + return self._results.get((owner_id, logical_key)) + + +def test_resolve_schema_delegates_to_find_schema(): + stub = _StubStore() + sentinel = object() + stub.set("alice", "s:test:1.0.0", sentinel) + result = resolve_schema(stub, "alice", "s:test", "1.0.0") + assert result is sentinel + + +def test_resolve_schema_returns_none_when_missing(): + stub = _StubStore() + assert resolve_schema(stub, "alice", "s:nope", "1.0.0") is None + + +def test_resolve_schema_composes_logical_key_correctly(): + """Confirms family+version are composed via compose_schema_logical_key.""" + stub = _StubStore() + resolve_schema(stub, "alice", "schema:edge-manifest", "2.3.4") + assert stub.calls == [("alice", "schema:edge-manifest:2.3.4")] From 524cfe3561022fd676ae19019058ad2dcb543f7a Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:54:30 -0500 Subject: [PATCH 11/28] feat: add assert_schema_deletable and SchemaInUseError Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/validation.py | 32 ++++++++++++++++++++++++++++++++ tests/test_validation.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/mcp_awareness/validation.py b/src/mcp_awareness/validation.py index 5e137ae..9bb5d70 100644 --- a/src/mcp_awareness/validation.py +++ b/src/mcp_awareness/validation.py @@ -94,3 +94,35 @@ def resolve_schema( the SQL level). Returns the schema Entry or None. """ return store.find_schema(owner_id, compose_schema_logical_key(family, version)) + + +class SchemaInUseError(Exception): + """Raised when a schema cannot be deleted because records reference it. + + Callers at the MCP boundary translate this into a structured schema_in_use + error response with the referencing_records list and total_count. + """ + + def __init__(self, total_count: int, referencing_records: list[str]): + self.total_count = total_count + self.referencing_records = referencing_records + super().__init__( + f"Cannot delete schema: {total_count} record(s) still reference it" + ) + + +class _RefCounter(Protocol): + """Minimal protocol for assert_schema_deletable's store dependency.""" + + def count_records_referencing( + self, owner_id: str, schema_logical_key: str + ) -> tuple[int, list[str]]: ... + + +def assert_schema_deletable( + store: _RefCounter, owner_id: str, schema_logical_key: str +) -> None: + """Raise SchemaInUseError if any non-deleted records reference this schema.""" + count, ids = store.count_records_referencing(owner_id, schema_logical_key) + if count > 0: + raise SchemaInUseError(total_count=count, referencing_records=ids) diff --git a/tests/test_validation.py b/tests/test_validation.py index 932a7bd..4d7fb37 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -22,6 +22,8 @@ import pytest from mcp_awareness.validation import ( + SchemaInUseError, + assert_schema_deletable, compose_schema_logical_key, resolve_schema, validate_record_content, @@ -177,3 +179,31 @@ def test_resolve_schema_composes_logical_key_correctly(): stub = _StubStore() resolve_schema(stub, "alice", "schema:edge-manifest", "2.3.4") assert stub.calls == [("alice", "schema:edge-manifest:2.3.4")] + + +class _CounterStore: + """Stub exposing count_records_referencing for assert_schema_deletable tests.""" + + def __init__(self, count: int, ids: list[str]): + self._count = count + self._ids = ids + + def count_records_referencing(self, owner_id, schema_logical_key): + return (self._count, self._ids) + + +def test_assert_schema_deletable_passes_with_zero_refs(): + # Must not raise + assert_schema_deletable(_CounterStore(0, []), "alice", "s:test:1.0.0") + + +def test_assert_schema_deletable_raises_with_refs(): + with pytest.raises(SchemaInUseError) as excinfo: + assert_schema_deletable(_CounterStore(3, ["id1", "id2", "id3"]), "alice", "s:test:1.0.0") + assert excinfo.value.total_count == 3 + assert excinfo.value.referencing_records == ["id1", "id2", "id3"] + + +def test_schema_in_use_error_has_readable_message(): + err = SchemaInUseError(total_count=5, referencing_records=["a", "b"]) + assert "5" in str(err) From ab50543df5165ca7bfb4f30853e9716917245fa8 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:56:07 -0500 Subject: [PATCH 12/28] feat: add migration seeding _system user for shared schemas Idempotent INSERT ON CONFLICT DO NOTHING seeds the _system user row so entries with owner_id='_system' have a valid owner. Includes idempotence test verifying the ON CONFLICT path does not create duplicates. Co-Authored-By: Claude Opus 4.6 (1M context) --- ...8h9i0j1k2l3_add_system_user_for_schemas.py | 56 +++++++++++++++++++ tests/test_store.py | 15 +++++ 2 files changed, 71 insertions(+) create mode 100644 alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py diff --git a/alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py b/alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py new file mode 100644 index 0000000..ed49c5c --- /dev/null +++ b/alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py @@ -0,0 +1,56 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""add _system user for system-owned schemas + +Revision ID: m8h9i0j1k2l3 +Revises: l7g8h9i0j1k2 +Create Date: 2026-04-13 00:00:00.000000 + +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from alembic import op + +revision: str = "m8h9i0j1k2l3" +down_revision: str | Sequence[str] | None = "l7g8h9i0j1k2" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Seed the _system user for system-owned schema entries. + + Idempotent — ON CONFLICT DO NOTHING lets the migration run multiple + times safely (e.g., after a stamp-and-reapply). + """ + op.execute( + "INSERT INTO users (id, display_name) " + "VALUES ('_system', 'System-managed schemas') " + "ON CONFLICT (id) DO NOTHING" + ) + + +def downgrade() -> None: + """Remove the _system user. + + Will fail if any entries still reference owner_id='_system'. Operators + must soft-delete or re-home such entries before downgrade. + """ + op.execute("DELETE FROM users WHERE id = '_system'") diff --git a/tests/test_store.py b/tests/test_store.py index fbcedf0..7de182c 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -3389,3 +3389,18 @@ def test_count_records_referencing_caps_id_list_at_ten(store): count, ids = store.count_records_referencing(TEST_OWNER, "s:test:1.0.0") assert count == 15 assert len(ids) == 10 + + +def test_system_user_exists_after_migration_idempotent(store): + """The conftest fixture inserts _system — verifies ON CONFLICT DO NOTHING semantics.""" + with store._pool.connection() as conn, conn.cursor() as cur: + cur.execute( + "INSERT INTO users (id, display_name) VALUES ('_system', 'Re-insert') " + "ON CONFLICT (id) DO NOTHING" + ) + conn.commit() + cur.execute("SELECT COUNT(*) FROM users WHERE id = '_system'") + row = cur.fetchone() + # Cursor may be dict_row — handle both styles + count = row["count"] if isinstance(row, dict) else row[0] + assert count == 1 From 5f93485c7f1de811f2dc128ff780b8a24a97cb21 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:02:46 -0500 Subject: [PATCH 13/28] feat: add register_schema MCP tool Implements the register_schema tool with JSON Schema Draft 2020-12 validation, duplicate detection via psycopg UniqueViolation, and integration tests. Also registers the tool in server.py re-exports and updates TestWriteResponseShapes to cover the new tool. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/server.py | 1 + src/mcp_awareness/tools.py | 105 +++++++++++++++++++++ tests/test_server.py | 10 ++ tests/test_tools_schema_record.py | 148 ++++++++++++++++++++++++++++++ 4 files changed, 264 insertions(+) create mode 100644 tests/test_tools_schema_record.py diff --git a/src/mcp_awareness/server.py b/src/mcp_awareness/server.py index 4a71e42..47c9f94 100644 --- a/src/mcp_awareness/server.py +++ b/src/mcp_awareness/server.py @@ -696,6 +696,7 @@ def _run() -> None: get_tags, get_unread, learn_pattern, + register_schema, remember, remind, report_alert, diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index ef181f7..530349d 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -528,6 +528,111 @@ async def remember( return json.dumps({"status": "ok", "id": entry.id}) +@_srv.mcp.tool() +@_timed +async def register_schema( + source: str, + tags: list[str], + description: str, + family: str, + version: str, + schema: dict[str, Any], + learned_from: str = "conversation", + language: str | None = None, +) -> str: + """Register a new JSON Schema entry for later use by records. + + Validates the schema body against JSON Schema Draft 2020-12 meta-schema + on write. Family + version are combined into the entry's logical_key + (family:version); each version is a separate entry. Schemas are + absolutely immutable once registered — to change one, register a new + version and (if no records reference the old one) delete it. + + Returns: + JSON: {"status": "ok", "id": "", "logical_key": ""} + + If you receive an unstructured error, the failure is in the transport + or platform layer, not in awareness.""" + import psycopg.errors + from jsonschema import exceptions as jse # type: ignore[import-untyped] + + from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body + + # Validate inputs + if not family: + _error_response( + "invalid_parameter", + "family must be a non-empty string", + retryable=False, + param="family", + ) + if not version: + _error_response( + "invalid_parameter", + "version must be a non-empty string", + retryable=False, + param="version", + ) + + # Meta-schema validation + try: + validate_schema_body(schema) + except jse.SchemaError as e: + _error_response( + "invalid_schema", + f"Schema does not conform to JSON Schema Draft 2020-12: {e.message}", + retryable=False, + ) + + logical_key = compose_schema_logical_key(family, version) + + now = now_utc() + data: dict[str, Any] = { + "family": family, + "version": version, + "schema": schema, + "description": description, + "learned_from": learned_from, + } + text_for_detect = compose_detection_text("schema", data) + resolved_lang = resolve_language(explicit=language, text_for_detection=text_for_detect) + _check_unsupported_language(text_for_detect, resolved_lang) + + entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source=source, + tags=tags, + created=now, + expires=None, + data=data, + logical_key=logical_key, + language=resolved_lang, + ) + + try: + _srv.store.add(_srv._owner_id(), entry) + except psycopg.errors.UniqueViolation: + _error_response( + "schema_already_exists", + f"Schema {logical_key} already exists in source {source!r}", + retryable=False, + ) + except Exception as e: + # Fallback: detect unique constraint via message for non-psycopg wrappers + msg = str(e).lower() + if "unique" in msg or "duplicate" in msg or "23505" in msg: + _error_response( + "schema_already_exists", + f"Schema {logical_key} already exists in source {source!r}", + retryable=False, + ) + raise + + _srv._generate_embedding(entry) + return json.dumps({"status": "ok", "id": entry.id, "logical_key": logical_key}) + + @_srv.mcp.tool() @_timed async def update_entry( diff --git a/tests/test_server.py b/tests/test_server.py index 8264c7d..65d9def 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -4000,6 +4000,7 @@ class TestWriteResponseShapes: # "id" here means the caller-supplied entry_id (lookup target), # NOT a server-generated entry id like other tools' responses "update_intention": {"id"}, + "register_schema": set(), # response is server-derived id + logical_key } # Tools registered on _srv.mcp that are NOT write tools — explicitly @@ -4169,6 +4170,15 @@ async def _invoke_with_sentinels(self, tool_name: str, sentinels: set[str]) -> s state="fired", reason=s(sentinels, "reason"), ) + if tool_name == "register_schema": + return await server_mod.register_schema( + source=s(sentinels, "src"), + tags=[s(sentinels, "tag")], + description=s(sentinels, "desc"), + family="schema:sentinel-test", + version="1.0.0", + schema={"type": "object"}, + ) raise ValueError(f"Unknown tool in registry: {tool_name}") @pytest.mark.anyio diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py new file mode 100644 index 0000000..1ec166b --- /dev/null +++ b/tests/test_tools_schema_record.py @@ -0,0 +1,148 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Integration tests for schema/record MCP tool handlers. + +Uses testcontainers Postgres + direct tool-function calls via the server's +_owner_id / store accessors (both monkeypatched for tests). +""" + +from __future__ import annotations + +import json + +import pytest + +from mcp_awareness.schema import EntryType # noqa: F401 + +TEST_OWNER = "test-owner" + + +@pytest.fixture +def configured_server(store, monkeypatch): + """Wire the FastMCP server-module helpers to the testcontainers store and owner.""" + import mcp_awareness.server as srv + + monkeypatch.setattr(srv, "store", store) + monkeypatch.setattr(srv, "_owner_id", lambda: TEST_OWNER) + yield srv + + +def _parse_tool_error(excinfo): + """Parse the structured JSON envelope from a ToolError.""" + return json.loads(str(excinfo.value)) + + +@pytest.mark.asyncio +async def test_register_schema_happy_path(configured_server): + from mcp_awareness.tools import register_schema + + response = await register_schema( + source="test", + tags=["schema"], + description="test schema", + family="schema:test-thing", + version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + body = json.loads(response) + assert body["status"] == "ok" + assert body["logical_key"] == "schema:test-thing:1.0.0" + assert "id" in body + + +@pytest.mark.asyncio +async def test_register_schema_rejects_invalid_schema(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import register_schema + + with pytest.raises(ToolError) as excinfo: + await register_schema( + source="test", + tags=[], + description="bad schema", + family="schema:bad", + version="1.0.0", + schema={"type": "strng"}, # typo — not a valid JSON Schema type + ) + err = _parse_tool_error(excinfo)["error"] + assert err["code"] == "invalid_schema" + + +@pytest.mark.asyncio +async def test_register_schema_rejects_duplicate_family_version(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import register_schema + + await register_schema( + source="test", + tags=[], + description="v1", + family="schema:dup", + version="1.0.0", + schema={"type": "object"}, + ) + with pytest.raises(ToolError) as excinfo: + await register_schema( + source="test", + tags=[], + description="v1 again", + family="schema:dup", + version="1.0.0", + schema={"type": "object"}, + ) + err = _parse_tool_error(excinfo)["error"] + assert err["code"] == "schema_already_exists" + + +@pytest.mark.asyncio +async def test_register_schema_rejects_empty_family(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import register_schema + + with pytest.raises(ToolError) as excinfo: + await register_schema( + source="test", + tags=[], + description="bad", + family="", + version="1.0.0", + schema={"type": "object"}, + ) + err = _parse_tool_error(excinfo)["error"] + assert err["code"] == "invalid_parameter" + + +@pytest.mark.asyncio +async def test_register_schema_rejects_empty_version(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import register_schema + + with pytest.raises(ToolError) as excinfo: + await register_schema( + source="test", + tags=[], + description="bad", + family="schema:test", + version="", + schema={"type": "object"}, + ) + err = _parse_tool_error(excinfo)["error"] + assert err["code"] == "invalid_parameter" From d4effbf425f8497c083b9ad25543bc9ad71e96cf Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:07:59 -0500 Subject: [PATCH 14/28] feat: add create_record MCP tool with schema validation and _system fallback Resolves schema by ref+version (caller-owned first, _system fallback), validates content via validate_record_content, and upserts on logical_key. Raises structured ToolError with validation_errors list on schema mismatch. Truncation sentinel from validate_record_content is promoted to top-level envelope fields. TestWriteResponseShapes updated for create_record. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/server.py | 1 + src/mcp_awareness/tools.py | 105 +++++++++++++++++++++++++++ tests/test_server.py | 17 +++++ tests/test_tools_schema_record.py | 117 ++++++++++++++++++++++++++++++ 4 files changed, 240 insertions(+) diff --git a/src/mcp_awareness/server.py b/src/mcp_awareness/server.py index 47c9f94..be402e6 100644 --- a/src/mcp_awareness/server.py +++ b/src/mcp_awareness/server.py @@ -680,6 +680,7 @@ def _run() -> None: acted_on, add_context, backfill_embeddings, + create_record, delete_entry, get_actions, get_activity, diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 530349d..96d9111 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -633,6 +633,111 @@ async def register_schema( return json.dumps({"status": "ok", "id": entry.id, "logical_key": logical_key}) +@_srv.mcp.tool() +@_timed +async def create_record( + source: str, + tags: list[str], + description: str, + logical_key: str, + schema_ref: str, + schema_version: str, + content: Any, + learned_from: str = "conversation", + language: str | None = None, +) -> str: + """Create or upsert a record validated against a registered schema. + + Resolves the target schema by schema_ref + schema_version (prefers + caller-owned, falls back to _system). Validates content against the + schema on write; rejects with a structured validation_failed error + listing every validation error. Upserts on matching (source, logical_key) + — same logical_key means update in place with changelog. + + Returns: + JSON: {"status": "ok", "id": "", "action": "created" | "updated"} + + If you receive an unstructured error, the failure is in the transport + or platform layer, not in awareness.""" + from mcp.server.fastmcp.exceptions import ToolError + from jsonschema import exceptions as jse # type: ignore[import-untyped] + + from mcp_awareness.validation import resolve_schema, validate_record_content + + resolved = resolve_schema(_srv.store, _srv._owner_id(), schema_ref, schema_version) + if resolved is None: + raise ToolError(json.dumps({ + "status": "error", + "error": { + "code": "schema_not_found", + "message": f"No schema {schema_ref}:{schema_version} in your namespace or _system", + "retryable": False, + "schema_ref": schema_ref, + "schema_version": schema_version, + "searched_owners": [_srv._owner_id(), "_system"], + }, + })) + + schema_body = resolved.data["schema"] # type: ignore[union-attr] + try: + errors = validate_record_content(schema_body, content) + except jse.JsonSchemaException as e: + _error_response( + "validation_error", + f"Unexpected content validation error: {e}", + retryable=False, + ) + + if errors: + # Detect truncation sentinel (always last item when present) + truncated = errors[-1].get("truncated") is True + total_errors = errors[-1]["total_errors"] if truncated else len(errors) + validation_errors = errors[:-1] if truncated else errors + err_body: dict[str, Any] = { + "code": "validation_failed", + "message": f"Record content does not conform to schema {schema_ref}:{schema_version} ({total_errors} errors)", + "retryable": False, + "schema_ref": schema_ref, + "schema_version": schema_version, + "validation_errors": validation_errors, + } + if truncated: + err_body["truncated"] = True + err_body["total_errors"] = total_errors + raise ToolError(json.dumps({"status": "error", "error": err_body})) + + now = now_utc() + data: dict[str, Any] = { + "schema_ref": schema_ref, + "schema_version": schema_version, + "content": content, + "description": description, + "learned_from": learned_from, + } + text_for_detect = compose_detection_text("record", data) + resolved_lang = resolve_language(explicit=language, text_for_detection=text_for_detect) + _check_unsupported_language(text_for_detect, resolved_lang) + + entry = Entry( + id=make_id(), + type=EntryType.RECORD, + source=source, + tags=tags, + created=now, + expires=None, + data=data, + logical_key=logical_key, + language=resolved_lang, + ) + + saved, created = _srv.store.upsert_by_logical_key( + _srv._owner_id(), source, logical_key, entry + ) + _srv._generate_embedding(saved) + action = "created" if created else "updated" + return json.dumps({"status": "ok", "id": saved.id, "action": action}) + + @_srv.mcp.tool() @_timed async def update_entry( diff --git a/tests/test_server.py b/tests/test_server.py index 65d9def..c313862 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -4001,6 +4001,7 @@ class TestWriteResponseShapes: # NOT a server-generated entry id like other tools' responses "update_intention": {"id"}, "register_schema": set(), # response is server-derived id + logical_key + "create_record": set(), # response contains only server-derived fields } # Tools registered on _srv.mcp that are NOT write tools — explicitly @@ -4179,6 +4180,22 @@ async def _invoke_with_sentinels(self, tool_name: str, sentinels: set[str]) -> s version="1.0.0", schema={"type": "object"}, ) + if tool_name == "create_record": + # Register a schema first (not sentinel-wrapped — it's a prerequisite) + await server_mod.register_schema( + source="setup", tags=[], description="setup", + family="schema:sentinel-record-test", version="1.0.0", + schema={"type": "object"}, + ) + return await server_mod.create_record( + source=s(sentinels, "src"), + tags=[s(sentinels, "tag")], + description=s(sentinels, "desc"), + logical_key="sentinel-record-key", + schema_ref="schema:sentinel-record-test", + schema_version="1.0.0", + content={"key": "value"}, + ) raise ValueError(f"Unknown tool in registry: {tool_name}") @pytest.mark.anyio diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index 1ec166b..d8870c3 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -146,3 +146,120 @@ async def test_register_schema_rejects_empty_version(configured_server): ) err = _parse_tool_error(excinfo)["error"] assert err["code"] == "invalid_parameter" + + +@pytest.mark.asyncio +async def test_create_record_happy_path(configured_server): + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + response = await create_record( + source="test", tags=[], description="a thing", + logical_key="thing-one", + schema_ref="schema:thing", schema_version="1.0.0", + content={"name": "widget"}, + ) + body = json.loads(response) + assert body["status"] == "ok" + assert body["action"] == "created" + assert "id" in body + + +@pytest.mark.asyncio +async def test_create_record_rejects_unknown_schema(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record + + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", tags=[], description="orphan", + logical_key="thing-one", + schema_ref="schema:does-not-exist", schema_version="1.0.0", + content={"name": "widget"}, + ) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "schema_not_found" + assert err["searched_owners"] == [TEST_OWNER, "_system"] + + +@pytest.mark.asyncio +async def test_create_record_surfaces_validation_errors(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", tags=[], description="s", + family="schema:person", version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, "required": ["name"]}, + ) + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", tags=[], description="bad person", + logical_key="p1", + schema_ref="schema:person", schema_version="1.0.0", + content={"age": "thirty"}, + ) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "validation_failed" + validators = {ve["validator"] for ve in err["validation_errors"]} + assert "required" in validators + assert "type" in validators + + +@pytest.mark.asyncio +async def test_create_record_upsert_on_same_logical_key(configured_server): + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + ) + r1 = json.loads(await create_record( + source="test", tags=[], description="v1", + logical_key="thing-one", + schema_ref="schema:thing", schema_version="1.0.0", + content={"v": 1}, + )) + assert r1["action"] == "created" + r2 = json.loads(await create_record( + source="test", tags=[], description="v2", + logical_key="thing-one", + schema_ref="schema:thing", schema_version="1.0.0", + content={"v": 2}, + )) + assert r2["action"] == "updated" + assert r2["id"] == r1["id"] + + +@pytest.mark.asyncio +async def test_create_record_uses_system_schema_fallback(configured_server, store): + """A record can reference a schema owned by _system, not the caller.""" + from mcp_awareness.schema import Entry, make_id, now_utc + from mcp_awareness.tools import create_record + + # Seed _system schema directly via store (not via tool — tool writes caller's owner) + store.add("_system", Entry( + id=make_id(), type=EntryType.SCHEMA, source="system", + tags=["system"], created=now_utc(), expires=None, + data={ + "family": "schema:system-thing", "version": "1.0.0", + "schema": {"type": "object"}, + "description": "system-seeded", "learned_from": "cli-bootstrap", + }, + logical_key="schema:system-thing:1.0.0", + )) + + response = await create_record( + source="test", tags=[], description="mine", + logical_key="mine-1", + schema_ref="schema:system-thing", schema_version="1.0.0", + content={"any": "thing"}, + ) + body = json.loads(response) + assert body["status"] == "ok" + assert body["action"] == "created" From 65ee9b847c31c9de8362fd7a5b5cf621b88259d5 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:13:09 -0500 Subject: [PATCH 15/28] refactor: extend _error_response to accept **extras Adds **extras: Any kwargs to _error_response so structured fields beyond the fixed set (schema_ref, schema_version, searched_owners, validation_errors, etc.) can flow through the error envelope uniformly without raw ToolError construction at call sites. Adds TestErrorResponseExtras unit tests verifying extras appear in the raised ToolError JSON payload and do not clobber the fixed fields. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/helpers.py | 7 +++++ tests/test_helpers.py | 51 ++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/mcp_awareness/helpers.py b/src/mcp_awareness/helpers.py index ce742f3..73799a1 100644 --- a/src/mcp_awareness/helpers.py +++ b/src/mcp_awareness/helpers.py @@ -221,6 +221,7 @@ def _error_response( valid: list[str] | None = None, suggestion: str | None = None, help_url: str | None = None, + **extras: Any, ) -> NoReturn: """Build a structured error envelope and raise ToolError. @@ -228,6 +229,10 @@ def _error_response( so clients get proper error signaling. The JSON envelope provides structured fields for smart clients alongside a human-readable message. + Extra keyword arguments (``**extras``) are merged into the error dict + after the fixed fields, allowing structured context such as + ``schema_ref``, ``validation_errors``, ``searched_owners``, etc. + Raises: ToolError: always — this function never returns. """ @@ -248,6 +253,8 @@ def _error_response( error["suggestion"] = suggestion if help_url is not None: error["help_url"] = help_url + for k, v in extras.items(): + error[k] = v raise ToolError(json.dumps({"status": "error", "error": error})) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index ce9ff4c..6eef5db 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -153,6 +153,57 @@ def test_multiple_extra_params(self): assert "sslmode=verify-full" in result assert "connect_timeout=10" in result + +class TestErrorResponseExtras: + """Test that _error_response merges **extras into the error envelope.""" + + def test_extras_appear_in_payload(self): + """Extra keyword arguments must be present in the raised ToolError JSON.""" + from mcp.server.fastmcp.exceptions import ToolError + + with pytest.raises(ToolError) as excinfo: + _error_response( + "schema_not_found", + "No matching schema", + retryable=False, + schema_ref="schema:thing", + schema_version="1.0.0", + searched_owners=["alice", "_system"], + ) + payload = json.loads(str(excinfo.value)) + err = payload["error"] + assert err["code"] == "schema_not_found" + assert err["schema_ref"] == "schema:thing" + assert err["schema_version"] == "1.0.0" + assert err["searched_owners"] == ["alice", "_system"] + + def test_extras_do_not_override_fixed_fields(self): + """Extras cannot clobber the mandatory fixed fields.""" + from mcp.server.fastmcp.exceptions import ToolError + + with pytest.raises(ToolError) as excinfo: + _error_response( + "some_error", + "Some message", + retryable=True, + extra_field="extra_value", + ) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "some_error" + assert err["message"] == "Some message" + assert err["retryable"] is True + assert err["extra_field"] == "extra_value" + + def test_no_extras_still_works(self): + """Calling without extras should behave as before.""" + from mcp.server.fastmcp.exceptions import ToolError + + with pytest.raises(ToolError) as excinfo: + _error_response("plain_error", "Plain message", retryable=False) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "plain_error" + assert "schema_ref" not in err + def test_unix_socket_host(self): """Unix socket path goes in query string, not netloc.""" dsn = "host=/var/run/postgresql dbname=db user=u" From 61197a4c4384c6ffb0e6c4f0a8bfc1805c4c11f5 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:13:19 -0500 Subject: [PATCH 16/28] feat: update_entry enforces schema immutability + record re-validation Adds type-specific branching in update_entry after the updates dict is built but before the store write: - SCHEMA entries: always rejected with schema_immutable error - RECORD entries + content kwarg: re-validates content against the registered schema (resolver uses _system fallback); rejects with validation_failed including structured per-error list - RECORD entries + no content kwarg: passes through unchanged (description-only updates skip re-validation) Also refactors create_record to use _error_response with **extras instead of raw ToolError(json.dumps(...)) construction, removing the last two raw-ToolError sites from the tools module. Adds 4 integration tests covering all branches. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/tools.py | 96 ++++++++++--- tests/test_tools_schema_record.py | 229 +++++++++++++++++++++++++----- 2 files changed, 265 insertions(+), 60 deletions(-) diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 96d9111..3789ccc 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -659,24 +659,20 @@ async def create_record( If you receive an unstructured error, the failure is in the transport or platform layer, not in awareness.""" - from mcp.server.fastmcp.exceptions import ToolError from jsonschema import exceptions as jse # type: ignore[import-untyped] from mcp_awareness.validation import resolve_schema, validate_record_content resolved = resolve_schema(_srv.store, _srv._owner_id(), schema_ref, schema_version) if resolved is None: - raise ToolError(json.dumps({ - "status": "error", - "error": { - "code": "schema_not_found", - "message": f"No schema {schema_ref}:{schema_version} in your namespace or _system", - "retryable": False, - "schema_ref": schema_ref, - "schema_version": schema_version, - "searched_owners": [_srv._owner_id(), "_system"], - }, - })) + _error_response( + "schema_not_found", + f"No schema {schema_ref}:{schema_version} in your namespace or _system", + retryable=False, + schema_ref=schema_ref, + schema_version=schema_version, + searched_owners=[_srv._owner_id(), "_system"], + ) schema_body = resolved.data["schema"] # type: ignore[union-attr] try: @@ -693,18 +689,23 @@ async def create_record( truncated = errors[-1].get("truncated") is True total_errors = errors[-1]["total_errors"] if truncated else len(errors) validation_errors = errors[:-1] if truncated else errors - err_body: dict[str, Any] = { - "code": "validation_failed", - "message": f"Record content does not conform to schema {schema_ref}:{schema_version} ({total_errors} errors)", - "retryable": False, + vf_extras: dict[str, Any] = { "schema_ref": schema_ref, "schema_version": schema_version, "validation_errors": validation_errors, } if truncated: - err_body["truncated"] = True - err_body["total_errors"] = total_errors - raise ToolError(json.dumps({"status": "error", "error": err_body})) + vf_extras["truncated"] = True + vf_extras["total_errors"] = total_errors + _error_response( + "validation_failed", + ( + f"Record content does not conform to schema" + f" {schema_ref}:{schema_version} ({total_errors} errors)" + ), + retryable=False, + **vf_extras, + ) now = now_utc() data: dict[str, Any] = { @@ -730,9 +731,7 @@ async def create_record( language=resolved_lang, ) - saved, created = _srv.store.upsert_by_logical_key( - _srv._owner_id(), source, logical_key, entry - ) + saved, created = _srv.store.upsert_by_logical_key(_srv._owner_id(), source, logical_key, entry) _srv._generate_embedding(saved) action = "created" if created else "updated" return json.dumps({"status": "ok", "id": saved.id, "action": action}) @@ -764,6 +763,8 @@ async def update_entry( updates["tags"] = tags if source is not None: updates["source"] = source + # Preserve the raw content value for re-validation before stringifying it. + _raw_content: Any = content if content is not None: if not isinstance(content, str): content = json.dumps(content) @@ -782,6 +783,57 @@ async def update_entry( retryable=False, param="content", ) + # --- New: type-specific branching for schema and record entries --- + from mcp_awareness.schema import EntryType as _EntryType + from mcp_awareness.validation import resolve_schema, validate_record_content + + _existing = _srv.store.get_entry_by_id(_srv._owner_id(), entry_id) + if _existing is not None: + if _existing.type == _EntryType.SCHEMA: + _error_response( + "schema_immutable", + "Schemas cannot be updated. Register a new version instead.", + retryable=False, + ) + if _existing.type == _EntryType.RECORD and _raw_content is not None: + _schema_ref = _existing.data["schema_ref"] + _schema_version = _existing.data["schema_version"] + _resolved = resolve_schema(_srv.store, _srv._owner_id(), _schema_ref, _schema_version) + if _resolved is None: + _error_response( + "schema_not_found", + f"Cannot re-validate: schema {_schema_ref}:{_schema_version} not found", + retryable=False, + schema_ref=_schema_ref, + schema_version=_schema_version, + searched_owners=[_srv._owner_id(), "_system"], + ) + _content_to_validate = ( + json.loads(_raw_content) if isinstance(_raw_content, str) else _raw_content + ) + _errors = validate_record_content(_resolved.data["schema"], _content_to_validate) + if _errors: + _truncated = _errors[-1].get("truncated") is True + _total_errors = _errors[-1]["total_errors"] if _truncated else len(_errors) + _validation_errors = _errors[:-1] if _truncated else _errors + _vf_extras: dict[str, Any] = { + "schema_ref": _schema_ref, + "schema_version": _schema_version, + "validation_errors": _validation_errors, + } + if _truncated: + _vf_extras["truncated"] = True + _vf_extras["total_errors"] = _total_errors + _error_response( + "validation_failed", + ( + f"Record content does not conform to schema" + f" {_schema_ref}:{_schema_version} ({_total_errors} errors)" + ), + retryable=False, + **_vf_extras, + ) + # --- end branching --- result = _srv.store.update_entry(_srv._owner_id(), entry_id, updates) if result is None: _error_response( diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index d8870c3..d7e785f 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -26,7 +26,7 @@ import pytest -from mcp_awareness.schema import EntryType # noqa: F401 +from mcp_awareness.schema import EntryType TEST_OWNER = "test-owner" @@ -153,14 +153,20 @@ async def test_create_record_happy_path(configured_server): from mcp_awareness.tools import create_record, register_schema await register_schema( - source="test", tags=[], description="s", - family="schema:thing", version="1.0.0", + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, ) response = await create_record( - source="test", tags=[], description="a thing", + source="test", + tags=[], + description="a thing", logical_key="thing-one", - schema_ref="schema:thing", schema_version="1.0.0", + schema_ref="schema:thing", + schema_version="1.0.0", content={"name": "widget"}, ) body = json.loads(response) @@ -172,13 +178,17 @@ async def test_create_record_happy_path(configured_server): @pytest.mark.asyncio async def test_create_record_rejects_unknown_schema(configured_server): from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record with pytest.raises(ToolError) as excinfo: await create_record( - source="test", tags=[], description="orphan", + source="test", + tags=[], + description="orphan", logical_key="thing-one", - schema_ref="schema:does-not-exist", schema_version="1.0.0", + schema_ref="schema:does-not-exist", + schema_version="1.0.0", content={"name": "widget"}, ) err = json.loads(str(excinfo.value))["error"] @@ -189,18 +199,29 @@ async def test_create_record_rejects_unknown_schema(configured_server): @pytest.mark.asyncio async def test_create_record_surfaces_validation_errors(configured_server): from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record, register_schema await register_schema( - source="test", tags=[], description="s", - family="schema:person", version="1.0.0", - schema={"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, "required": ["name"]}, + source="test", + tags=[], + description="s", + family="schema:person", + version="1.0.0", + schema={ + "type": "object", + "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, + "required": ["name"], + }, ) with pytest.raises(ToolError) as excinfo: await create_record( - source="test", tags=[], description="bad person", + source="test", + tags=[], + description="bad person", logical_key="p1", - schema_ref="schema:person", schema_version="1.0.0", + schema_ref="schema:person", + schema_version="1.0.0", content={"age": "thirty"}, ) err = json.loads(str(excinfo.value))["error"] @@ -215,23 +236,36 @@ async def test_create_record_upsert_on_same_logical_key(configured_server): from mcp_awareness.tools import create_record, register_schema await register_schema( - source="test", tags=[], description="s", - family="schema:thing", version="1.0.0", + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", schema={"type": "object"}, ) - r1 = json.loads(await create_record( - source="test", tags=[], description="v1", - logical_key="thing-one", - schema_ref="schema:thing", schema_version="1.0.0", - content={"v": 1}, - )) + r1 = json.loads( + await create_record( + source="test", + tags=[], + description="v1", + logical_key="thing-one", + schema_ref="schema:thing", + schema_version="1.0.0", + content={"v": 1}, + ) + ) assert r1["action"] == "created" - r2 = json.loads(await create_record( - source="test", tags=[], description="v2", - logical_key="thing-one", - schema_ref="schema:thing", schema_version="1.0.0", - content={"v": 2}, - )) + r2 = json.loads( + await create_record( + source="test", + tags=[], + description="v2", + logical_key="thing-one", + schema_ref="schema:thing", + schema_version="1.0.0", + content={"v": 2}, + ) + ) assert r2["action"] == "updated" assert r2["id"] == r1["id"] @@ -243,23 +277,142 @@ async def test_create_record_uses_system_schema_fallback(configured_server, stor from mcp_awareness.tools import create_record # Seed _system schema directly via store (not via tool — tool writes caller's owner) - store.add("_system", Entry( - id=make_id(), type=EntryType.SCHEMA, source="system", - tags=["system"], created=now_utc(), expires=None, - data={ - "family": "schema:system-thing", "version": "1.0.0", - "schema": {"type": "object"}, - "description": "system-seeded", "learned_from": "cli-bootstrap", - }, - logical_key="schema:system-thing:1.0.0", - )) + store.add( + "_system", + Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="system", + tags=["system"], + created=now_utc(), + expires=None, + data={ + "family": "schema:system-thing", + "version": "1.0.0", + "schema": {"type": "object"}, + "description": "system-seeded", + "learned_from": "cli-bootstrap", + }, + logical_key="schema:system-thing:1.0.0", + ), + ) response = await create_record( - source="test", tags=[], description="mine", + source="test", + tags=[], + description="mine", logical_key="mine-1", - schema_ref="schema:system-thing", schema_version="1.0.0", + schema_ref="schema:system-thing", + schema_version="1.0.0", content={"any": "thing"}, ) body = json.loads(response) assert body["status"] == "ok" assert body["action"] == "created" + + +@pytest.mark.asyncio +async def test_update_entry_rejects_schema_update(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import register_schema, update_entry + + resp = json.loads( + await register_schema( + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", + schema={"type": "object"}, + ) + ) + with pytest.raises(ToolError) as excinfo: + await update_entry(entry_id=resp["id"], description="new desc") + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "schema_immutable" + + +@pytest.mark.asyncio +async def test_update_entry_record_content_revalidates_valid(configured_server): + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + r = json.loads( + await create_record( + source="test", + tags=[], + description="r", + logical_key="r1", + schema_ref="schema:thing", + schema_version="1.0.0", + content={"name": "good"}, + ) + ) + # Valid content update — passes re-validation + await update_entry(entry_id=r["id"], content={"name": "still-good"}) + + +@pytest.mark.asyncio +async def test_update_entry_record_content_rejects_invalid(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + r = json.loads( + await create_record( + source="test", + tags=[], + description="r", + logical_key="r1", + schema_ref="schema:thing", + schema_version="1.0.0", + content={"name": "good"}, + ) + ) + with pytest.raises(ToolError) as excinfo: + await update_entry(entry_id=r["id"], content={"name": 123}) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "validation_failed" + + +@pytest.mark.asyncio +async def test_update_entry_record_non_content_skips_revalidation(configured_server): + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}, + ) + r = json.loads( + await create_record( + source="test", + tags=[], + description="orig", + logical_key="r1", + schema_ref="schema:thing", + schema_version="1.0.0", + content={"name": "good"}, + ) + ) + # Description-only update skips re-validation + await update_entry(entry_id=r["id"], description="updated desc") From 9f8f4c5bd35209b5e03ab53a038dd89d88080d57 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:15:50 -0500 Subject: [PATCH 17/28] feat: delete_entry protects schemas referenced by live records Soft-deleting a schema entry is now blocked when live (non-deleted) records still reference it. The check runs in the by-id path only; bulk deletes by tags/source do not include schemas in scope. Adds three integration tests covering: no-records-succeeds, with-records-rejected, and allowed-after-records-deleted. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/tools.py | 16 +++++++++ tests/test_tools_schema_record.py | 60 +++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 3789ccc..63e0121 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -1010,6 +1010,22 @@ async def delete_entry( Returns JSON with status and count. If you receive an unstructured error, the failure is in the transport or platform layer, not in awareness.""" if entry_id: + from mcp_awareness.schema import EntryType + from mcp_awareness.validation import SchemaInUseError, assert_schema_deletable + + _candidate = _srv.store.get_entry_by_id(_srv._owner_id(), entry_id) + if _candidate is not None and _candidate.type == EntryType.SCHEMA: + if _candidate.logical_key is not None: + try: + assert_schema_deletable(_srv.store, _srv._owner_id(), _candidate.logical_key) + except SchemaInUseError as e: + _error_response( + "schema_in_use", + f"Cannot delete schema {_candidate.logical_key}: {e.total_count} record(s) reference it", + retryable=False, + referencing_records=e.referencing_records, + total_count=e.total_count, + ) _srv.store.soft_delete_by_id(_srv._owner_id(), entry_id) return json.dumps( { diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index d7e785f..d103e84 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -416,3 +416,63 @@ async def test_update_entry_record_non_content_skips_revalidation(configured_ser ) # Description-only update skips re-validation await update_entry(entry_id=r["id"], description="updated desc") + + +@pytest.mark.asyncio +async def test_delete_entry_schema_with_no_records_succeeds(configured_server): + from mcp_awareness.tools import delete_entry, register_schema + + resp = json.loads(await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + )) + # No records → soft-delete succeeds + await delete_entry(entry_id=resp["id"]) + # Verify soft-deleted: find_schema returns None + assert configured_server.store.find_schema(TEST_OWNER, "schema:thing:1.0.0") is None + + +@pytest.mark.asyncio +async def test_delete_entry_schema_with_records_rejected(configured_server): + from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record, delete_entry, register_schema + + resp = json.loads(await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + )) + await create_record( + source="test", tags=[], description="r", + logical_key="r1", + schema_ref="schema:thing", schema_version="1.0.0", + content={}, + ) + with pytest.raises(ToolError) as excinfo: + await delete_entry(entry_id=resp["id"]) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "schema_in_use" + assert len(err["referencing_records"]) == 1 + assert err["total_count"] == 1 + + +@pytest.mark.asyncio +async def test_delete_entry_schema_allowed_after_records_deleted(configured_server): + from mcp_awareness.tools import create_record, delete_entry, register_schema + + schema_resp = json.loads(await register_schema( + source="test", tags=[], description="s", + family="schema:thing", version="1.0.0", + schema={"type": "object"}, + )) + record_resp = json.loads(await create_record( + source="test", tags=[], description="r", + logical_key="r1", + schema_ref="schema:thing", schema_version="1.0.0", + content={}, + )) + # Soft-delete the record first + await delete_entry(entry_id=record_resp["id"]) + # Now schema delete succeeds + await delete_entry(entry_id=schema_resp["id"]) From e021e6b025ade4724efde6ba6e0ab7b525c0e1f5 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:18:55 -0500 Subject: [PATCH 18/28] feat: add mcp-awareness-register-schema CLI for _system schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator bootstrap tool for seeding _system-owned schema entries directly via PostgresStore — no MCP auth or middleware involved. Co-Authored-By: Claude Opus 4.6 (1M context) --- pyproject.toml | 1 + src/mcp_awareness/cli_register_schema.py | 164 +++++++++++++++++++++++ tests/test_cli_register_schema.py | 127 ++++++++++++++++++ 3 files changed, 292 insertions(+) create mode 100644 src/mcp_awareness/cli_register_schema.py create mode 100644 tests/test_cli_register_schema.py diff --git a/pyproject.toml b/pyproject.toml index 56ebe47..41d1e5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ mcp-awareness-migrate = "mcp_awareness.migrate:main" mcp-awareness-user = "mcp_awareness.cli:user_main" mcp-awareness-token = "mcp_awareness.cli:token_main" mcp-awareness-secret = "mcp_awareness.cli:secret_main" +mcp-awareness-register-schema = "mcp_awareness.cli_register_schema:main" [project.optional-dependencies] dev = [ diff --git a/src/mcp_awareness/cli_register_schema.py b/src/mcp_awareness/cli_register_schema.py new file mode 100644 index 0000000..c821883 --- /dev/null +++ b/src/mcp_awareness/cli_register_schema.py @@ -0,0 +1,164 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""CLI for registering _system-owned schema entries. + +Bypasses MCP entirely — operator tool, run once per built-in schema at +deploy/bootstrap time. No MCP auth, no middleware, direct PostgresStore +access. +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from pathlib import Path + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Register a _system-owned schema entry (operator bootstrap only).", + ) + parser.add_argument( + "--system", + action="store_true", + required=True, + help="Required. Confirms the caller intends to write to the _system owner.", + ) + parser.add_argument( + "--family", + required=True, + help="Schema family (e.g., schema:edge-manifest)", + ) + parser.add_argument( + "--version", + required=True, + help="Schema version (e.g., 1.0.0)", + ) + parser.add_argument( + "--schema-file", + required=True, + type=Path, + help="Path to JSON file containing the Draft 2020-12 schema body", + ) + parser.add_argument( + "--source", + required=True, + help="Source field for the entry", + ) + parser.add_argument( + "--tags", + default="", + help="Comma-separated tags (empty string for none)", + ) + parser.add_argument( + "--description", + required=True, + help="Entry description", + ) + args = parser.parse_args() + + # Read + parse schema file + if not args.schema_file.exists(): + print( + json.dumps({"error": {"code": "file_not_found", "message": str(args.schema_file)}}), + file=sys.stderr, + ) + sys.exit(1) + try: + schema_body = json.loads(args.schema_file.read_text()) + except json.JSONDecodeError as e: + print( + json.dumps({"error": {"code": "invalid_json", "message": str(e)}}), + file=sys.stderr, + ) + sys.exit(1) + + # Meta-schema validation + from jsonschema import exceptions as jse # type: ignore[import-untyped] + + from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body + + try: + validate_schema_body(schema_body) + except jse.SchemaError as e: + print( + json.dumps({ + "error": { + "code": "invalid_schema", + "message": str(e.message), + "schema_error_path": "/" + "/".join(str(p) for p in e.absolute_path), + } + }), + file=sys.stderr, + ) + sys.exit(1) + + # DB connection + database_url = os.environ.get("AWARENESS_DATABASE_URL", "") + if not database_url: + print( + json.dumps({ + "error": { + "code": "missing_env", + "message": "AWARENESS_DATABASE_URL required", + } + }), + file=sys.stderr, + ) + sys.exit(1) + + from mcp_awareness.postgres_store import PostgresStore + from mcp_awareness.schema import Entry, EntryType, make_id, now_utc + + store = PostgresStore(database_url) + logical_key = compose_schema_logical_key(args.family, args.version) + tags = [t.strip() for t in args.tags.split(",") if t.strip()] + + entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source=args.source, + tags=tags, + created=now_utc(), + expires=None, + data={ + "family": args.family, + "version": args.version, + "schema": schema_body, + "description": args.description, + "learned_from": "cli-bootstrap", + }, + logical_key=logical_key, + language="english", + ) + + try: + store.add("_system", entry) + except Exception as e: + print( + json.dumps({"error": {"code": "store_error", "message": str(e)}}), + file=sys.stderr, + ) + sys.exit(1) + + print(json.dumps({"status": "ok", "id": entry.id, "logical_key": logical_key})) + + +if __name__ == "__main__": + main() diff --git a/tests/test_cli_register_schema.py b/tests/test_cli_register_schema.py new file mode 100644 index 0000000..10c6e6b --- /dev/null +++ b/tests/test_cli_register_schema.py @@ -0,0 +1,127 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Tests for mcp-awareness-register-schema CLI.""" + +from __future__ import annotations + +import json +import tempfile +from pathlib import Path + +import pytest + + +@pytest.fixture +def system_schema_file(): + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: + json.dump({"type": "object", "properties": {"name": {"type": "string"}}}, f) + path = f.name + yield path + Path(path).unlink(missing_ok=True) + + +def test_cli_register_schema_happy_path(pg_dsn, system_schema_file, monkeypatch, capsys): + """End-to-end: CLI writes a _system schema via direct store access.""" + from mcp_awareness.cli_register_schema import main + + monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) + monkeypatch.setattr("sys.argv", [ + "mcp-awareness-register-schema", + "--system", + "--family", "schema:cli-test", + "--version", "1.0.0", + "--schema-file", system_schema_file, + "--source", "awareness-built-in", + "--tags", "cli,test", + "--description", "CLI-registered test schema", + ]) + + # Seed _system user so insert doesn't FK-violate (conftest fixture does this for store tests; + # CLI creates its own PostgresStore so we seed manually here) + from mcp_awareness.postgres_store import PostgresStore + tmp = PostgresStore(pg_dsn) + with tmp._pool.connection() as conn, conn.cursor() as cur: + cur.execute( + "INSERT INTO users (id, display_name) VALUES ('_system', 'System-managed schemas') " + "ON CONFLICT (id) DO NOTHING" + ) + conn.commit() + + main() + captured = capsys.readouterr() + body = json.loads(captured.out.strip()) + assert body["status"] == "ok" + assert body["logical_key"] == "schema:cli-test:1.0.0" + + # Verify entry exists in DB under _system owner + store = PostgresStore(pg_dsn) + entry = store.find_schema("any-caller", "schema:cli-test:1.0.0") + assert entry is not None + assert entry.data["learned_from"] == "cli-bootstrap" + + +def test_cli_register_schema_rejects_invalid_schema_file(pg_dsn, monkeypatch, capsys): + from mcp_awareness.cli_register_schema import main + + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: + json.dump({"type": "strng"}, f) # invalid + path = f.name + + monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) + monkeypatch.setattr("sys.argv", [ + "mcp-awareness-register-schema", "--system", + "--family", "schema:bad", "--version", "1.0.0", + "--schema-file", path, "--source", "test", "--tags", "", "--description", "bad", + ]) + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 + captured = capsys.readouterr() + assert "invalid_schema" in captured.err + Path(path).unlink(missing_ok=True) + + +def test_cli_register_schema_missing_db_url(monkeypatch, system_schema_file, capsys): + from mcp_awareness.cli_register_schema import main + + monkeypatch.delenv("AWARENESS_DATABASE_URL", raising=False) + monkeypatch.setattr("sys.argv", [ + "mcp-awareness-register-schema", "--system", + "--family", "schema:test", "--version", "1.0.0", + "--schema-file", system_schema_file, + "--source", "test", "--tags", "", "--description", "test", + ]) + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 + captured = capsys.readouterr() + assert "AWARENESS_DATABASE_URL" in captured.err or "missing_env" in captured.err + + +def test_cli_register_schema_missing_schema_file(pg_dsn, monkeypatch, capsys): + from mcp_awareness.cli_register_schema import main + + monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) + monkeypatch.setattr("sys.argv", [ + "mcp-awareness-register-schema", "--system", + "--family", "schema:test", "--version", "1.0.0", + "--schema-file", "/nonexistent/path.json", + "--source", "test", "--tags", "", "--description", "test", + ]) + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 From 2e02bd6fd9f3b0a109b27150250959ef140c52fb Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:20:38 -0500 Subject: [PATCH 19/28] test: cross-owner isolation and _system override semantics for schema/record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Append three new integration tests to verify: 1. test_cross_owner_schema_invisible — Owner A registers a schema; Owner B cannot resolve it. Verifies owner isolation at the tool boundary. 2. test_both_owners_see_system_schema — Both owners can use a _system schema. Verifies _system fallback works cross-owner. 3. test_caller_schema_overrides_system — When both _system and caller have the same logical_key, caller's version wins. Verifies override semantics via schema body difference (integer vs string). All three tests use the configured_server fixture with monkeypatch for owner switching. Tests verify the design point from Task 6: resolve_schema → find_schema query prefers caller-owned over _system via SQL CASE ORDER BY. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_tools_schema_record.py | 166 ++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index d103e84..0dd3cf8 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -476,3 +476,169 @@ async def test_delete_entry_schema_allowed_after_records_deleted(configured_serv await delete_entry(entry_id=record_resp["id"]) # Now schema delete succeeds await delete_entry(entry_id=schema_resp["id"]) + + +@pytest.mark.asyncio +async def test_cross_owner_schema_invisible(configured_server, store, monkeypatch): + """Owner A registers a schema; Owner B cannot resolve it.""" + import mcp_awareness.server as srv + + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import create_record, register_schema + + # Owner A (default TEST_OWNER) registers a schema + await register_schema( + source="test", + tags=[], + description="A's schema", + family="schema:mine", + version="1.0.0", + schema={"type": "object"}, + ) + + # Switch to Owner B by re-patching the _owner_id accessor + monkeypatch.setattr(srv, "_owner_id", lambda: "other-owner") + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", + tags=[], + description="B's attempt", + logical_key="r-b", + schema_ref="schema:mine", + schema_version="1.0.0", + content={}, + ) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "schema_not_found" + + +@pytest.mark.asyncio +async def test_both_owners_see_system_schema(configured_server, store, monkeypatch): + """Both A and B can use a _system schema.""" + import mcp_awareness.server as srv + + from mcp_awareness.schema import Entry, make_id, now_utc + + from mcp_awareness.tools import create_record + + # Seed _system schema directly + store.add( + "_system", + Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="system", + tags=["system"], + created=now_utc(), + expires=None, + data={ + "family": "schema:shared", + "version": "1.0.0", + "schema": {"type": "object"}, + "description": "shared", + "learned_from": "cli-bootstrap", + }, + logical_key="schema:shared:1.0.0", + ), + ) + + # A creates a record against _system schema + a_resp = json.loads( + await create_record( + source="test", + tags=[], + description="A's record", + logical_key="rec-a", + schema_ref="schema:shared", + schema_version="1.0.0", + content={"who": "alice"}, + ) + ) + assert a_resp["status"] == "ok" + + # Switch to Owner B + monkeypatch.setattr(srv, "_owner_id", lambda: "bob") + b_resp = json.loads( + await create_record( + source="test", + tags=[], + description="B's record", + logical_key="rec-b", + schema_ref="schema:shared", + schema_version="1.0.0", + content={"who": "bob"}, + ) + ) + assert b_resp["status"] == "ok" + + +@pytest.mark.asyncio +async def test_caller_schema_overrides_system(configured_server, store, monkeypatch): + """When both _system and caller have the same logical_key, caller's version wins.""" + import mcp_awareness.server as srv + + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.schema import Entry, make_id, now_utc + + from mcp_awareness.tools import create_record, register_schema + + # _system schema allows integer only + store.add( + "_system", + Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="system", + tags=["system"], + created=now_utc(), + expires=None, + data={ + "family": "schema:override", + "version": "1.0.0", + "schema": {"type": "integer"}, + "description": "system strict", + "learned_from": "cli-bootstrap", + }, + logical_key="schema:override:1.0.0", + ), + ) + + # Caller schema allows string only — overrides _system + await register_schema( + source="test", + tags=[], + description="caller's permissive", + family="schema:override", + version="1.0.0", + schema={"type": "string"}, + ) + + # Caller's record with a STRING should pass (caller's schema wins) + resp = json.loads( + await create_record( + source="test", + tags=[], + description="caller-wins", + logical_key="rec-override", + schema_ref="schema:override", + schema_version="1.0.0", + content="string-value", + ) + ) + assert resp["status"] == "ok" + + # Caller's record with an INTEGER should FAIL (caller's schema says string only) + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", + tags=[], + description="wrong-type", + logical_key="rec-override-2", + schema_ref="schema:override", + schema_version="1.0.0", + content=42, + ) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "validation_failed" From a1126ddc9bff0c2eb87681bea86c74c2eb3243e3 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:22:51 -0500 Subject: [PATCH 20/28] docs: document schema/record entry types, new tools, and CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add schema/record documentation to CHANGELOG, README (tool count 30→32, new tool descriptions), data-dictionary (new entry type specs), and server instructions. Include jsonschema dependency note and _system namespace details. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 9 +++++++++ README.md | 4 +++- docs/data-dictionary.md | 30 +++++++++++++++++++++++++++++- src/mcp_awareness/instructions.md | 8 ++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31c6ba2..610ab5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Two new entry types: `schema` (JSON Schema Draft 2020-12 definition) and `record` (validated payload conforming to a schema). Tools: `register_schema`, `create_record`. Schemas are absolutely immutable after registration; records re-validate on content update. Schema deletion is blocked while live records reference a version. Per-owner storage with a shared `_system` fallback namespace for built-in schemas. +- New CLI: `mcp-awareness-register-schema` for operators to seed `_system`-owned schemas at deploy time. +- New migration: `_system` user seed (idempotent). +- `_error_response()` helper now accepts `**extras` kwargs so tools can include structured fields in error envelopes beyond the fixed set (e.g., `validation_errors`, `schema_ref`, `referencing_records`). + +### Dependencies +- Added `jsonschema>=4.26.0` as a runtime dependency. + ## [0.17.0] - 2026-04-13 ### Added diff --git a/README.md b/README.md index d4d6f61..9b5dfd3 100644 --- a/README.md +++ b/README.md @@ -282,7 +282,7 @@ Results from the initial run (2026-03-27): HNSW query P50 stays under 4ms from 5 ## Tools -The server exposes 30 MCP tools. Clients that support MCP resources also get 6 read-only resources, but since not all clients surface resources, every resource has a tool mirror. +The server exposes 32 MCP tools. Clients that support MCP resources also get 6 read-only resources, but since not all clients surface resources, every resource has a tool mirror. ### Read tools @@ -318,6 +318,8 @@ The server exposes 30 MCP tools. Clients that support MCP resources also get 6 r | `remind` | Create a todo, reminder, or planned action. Optional `deliver_at` timestamp for time-based surfacing. Intentions have a lifecycle: pending → fired → active → completed. | | `update_intention` | Transition an intention state: pending → fired → active → completed/snoozed/cancelled. | | `acted_on` | Log that you took action because of an entry. Tags inherited from the entry. | +| `register_schema` | Define a typed data contract using JSON Schema Draft 2020-12. Schemas are immutable after registration; family + version become logical_key. Per-owner with `_system` fallback for shared built-in shapes. | +| `create_record` | Write a validated data entry conforming to a registered schema. Records pin exact schema version and re-validate on content update. Validation errors include every failure with structured envelope. | ### Data management tools diff --git a/docs/data-dictionary.md b/docs/data-dictionary.md index 58fdd7f..70237c1 100644 --- a/docs/data-dictionary.md +++ b/docs/data-dictionary.md @@ -46,7 +46,7 @@ The UNIQUE constraint is on `canonical_email`, not `email`. Users see and use th |--------|------|----------|-------------| | `id` | TEXT | No | Primary key. UUID v4, generated via `uuid.uuid4()`. | | `owner_id` | TEXT | No | Owner identifier. References the user who owns this entry. All queries are scoped by `owner_id`. | -| `type` | TEXT | No | Entry type. One of: `status`, `alert`, `pattern`, `suppression`, `context`, `preference`, `note`, `intention`. | +| `type` | TEXT | No | Entry type. One of: `status`, `alert`, `pattern`, `suppression`, `context`, `preference`, `note`, `intention`, `schema`, `record`. | | `source` | TEXT | No | Origin identifier. Describes the subject, not the owner (e.g., `"personal"`, `"synology-nas"`, `"mcp-awareness-project"`). | | `created` | TIMESTAMPTZ | No | UTC timestamp. Set once when the entry is first created. | | `updated` | TIMESTAMPTZ | Yes | UTC timestamp. Updated on every upsert or `update_entry` call. `NULL` until first update. | @@ -177,6 +177,34 @@ Written by agents via `set_preference`. Keyed by `key` + `scope` (upserted). Por | `value` | string | Yes | Preference value (e.g., `"one_sentence_warnings"`, `"first_turn_only"`). | | `scope` | string | Yes | Scope of the preference. Default: `"global"`. | +### `schema` — JSON Schema definitions + +Written by operators or agents via `register_schema`. Immutable after registration. Schema body lives in `data.schema`; family + version in `data.family` + `data.version`; `logical_key` derived as `{family}:{version}`. Used by `record` entries for typed validation. + +**`data` fields:** + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `family` | string | Yes | Schema family identifier (e.g., `schema:edge-manifest`, `schema:config`). Used as the reference key. | +| `version` | string | Yes | Schema version (user-chosen semantic or sequential, e.g., `"1.0.0"`, `"1"`). | +| `schema` | object | Yes | JSON Schema Draft 2020-12 body. Defines the validation rules and structure. | +| `description` | string | No | Human-readable description of what this schema validates. | +| `learned_from` | string | No | Platform that registered the schema (e.g., `"claude-code"`, `"operator"`). Default: `"conversation"`. | + +### `record` — Validated data entries + +Written by agents via `create_record`. Content in `data.content`; pinned schema reference in `data.schema_ref` + `data.schema_version` (exact version, no "latest" aliasing). Re-validated on content update via `update_entry`. + +**`data` fields:** + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `schema_ref` | string | Yes | Target schema family (e.g., `schema:edge-manifest`). Used to look up the schema definition. | +| `schema_version` | string | Yes | Target schema version (exact pin, e.g., `"1.0.0"`). Pinned at write time; determines which schema is used for validation on updates. | +| `content` | any | Yes | Validated payload — any JSON value (object, array, string, number, boolean, null). Must conform to the pinned schema. | +| `description` | string | No | Human-readable description of what this record represents. | +| `learned_from` | string | No | Platform that created the record (e.g., `"claude-code"`, edge provider name). Default: `"conversation"`. | + ## Lifecycle - **Upsert behavior:** `status` entries are upserted by `source`. `alert` entries by `source` + `alert_id`. `preference` entries by `key` + `scope`. Other types always insert new rows. diff --git a/src/mcp_awareness/instructions.md b/src/mcp_awareness/instructions.md index 0431d63..16aa02c 100644 --- a/src/mcp_awareness/instructions.md +++ b/src/mcp_awareness/instructions.md @@ -18,3 +18,11 @@ unbounded results. Use hint to re-rank by relevance so the best matches come first. Narrow with 2–3 specific tags rather than one broad tag. Use since/until for time-bounded queries. Call get_stats or get_tags first if you're unsure how much data exists. + +When you need typed data contracts for edge providers, tag taxonomies, or any +shape that should be validated on write: register a schema via `register_schema` +(family + version + JSON Schema body), then write records via `create_record` +referencing `schema_ref` + `schema_version`. Schemas are immutable after +registration — to evolve a shape, register a new version and soft-delete the +old one (only allowed when no records still reference it). Built-in shared +schemas live in the `_system` namespace, seeded by the operator. From f5b4d75bf24ef48eb625b59989b0a06490d07e1c Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:24:35 -0500 Subject: [PATCH 21/28] style: ruff format + lint fixes (SIM102, E501, I001, F401) Co-Authored-By: Claude Sonnet 4.6 --- src/mcp_awareness/cli_register_schema.py | 26 +++--- src/mcp_awareness/tools.py | 28 +++--- src/mcp_awareness/validation.py | 15 +--- tests/test_cli_register_schema.py | 104 +++++++++++++++++------ tests/test_server.py | 7 +- tests/test_tools_schema_record.py | 77 ++++++++++------- 6 files changed, 165 insertions(+), 92 deletions(-) diff --git a/src/mcp_awareness/cli_register_schema.py b/src/mcp_awareness/cli_register_schema.py index c821883..458b994 100644 --- a/src/mcp_awareness/cli_register_schema.py +++ b/src/mcp_awareness/cli_register_schema.py @@ -98,13 +98,15 @@ def main() -> None: validate_schema_body(schema_body) except jse.SchemaError as e: print( - json.dumps({ - "error": { - "code": "invalid_schema", - "message": str(e.message), - "schema_error_path": "/" + "/".join(str(p) for p in e.absolute_path), + json.dumps( + { + "error": { + "code": "invalid_schema", + "message": str(e.message), + "schema_error_path": "/" + "/".join(str(p) for p in e.absolute_path), + } } - }), + ), file=sys.stderr, ) sys.exit(1) @@ -113,12 +115,14 @@ def main() -> None: database_url = os.environ.get("AWARENESS_DATABASE_URL", "") if not database_url: print( - json.dumps({ - "error": { - "code": "missing_env", - "message": "AWARENESS_DATABASE_URL required", + json.dumps( + { + "error": { + "code": "missing_env", + "message": "AWARENESS_DATABASE_URL required", + } } - }), + ), file=sys.stderr, ) sys.exit(1) diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 63e0121..5f09c0c 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -1014,18 +1014,22 @@ async def delete_entry( from mcp_awareness.validation import SchemaInUseError, assert_schema_deletable _candidate = _srv.store.get_entry_by_id(_srv._owner_id(), entry_id) - if _candidate is not None and _candidate.type == EntryType.SCHEMA: - if _candidate.logical_key is not None: - try: - assert_schema_deletable(_srv.store, _srv._owner_id(), _candidate.logical_key) - except SchemaInUseError as e: - _error_response( - "schema_in_use", - f"Cannot delete schema {_candidate.logical_key}: {e.total_count} record(s) reference it", - retryable=False, - referencing_records=e.referencing_records, - total_count=e.total_count, - ) + if ( + _candidate is not None + and _candidate.type == EntryType.SCHEMA + and _candidate.logical_key is not None + ): + try: + assert_schema_deletable(_srv.store, _srv._owner_id(), _candidate.logical_key) + except SchemaInUseError as e: + _error_response( + "schema_in_use", + f"Cannot delete schema {_candidate.logical_key}:" + f" {e.total_count} record(s) reference it", + retryable=False, + referencing_records=e.referencing_records, + total_count=e.total_count, + ) _srv.store.soft_delete_by_id(_srv._owner_id(), entry_id) return json.dumps( { diff --git a/src/mcp_awareness/validation.py b/src/mcp_awareness/validation.py index 9bb5d70..0c6df5f 100644 --- a/src/mcp_awareness/validation.py +++ b/src/mcp_awareness/validation.py @@ -81,13 +81,10 @@ def validate_record_content(schema_body: dict[str, Any], content: Any) -> list[d class _SchemaFinder(Protocol): """Minimal protocol for resolve_schema's store dependency.""" - def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: - ... + def find_schema(self, owner_id: str, logical_key: str) -> Entry | None: ... -def resolve_schema( - store: _SchemaFinder, owner_id: str, family: str, version: str -) -> Entry | None: +def resolve_schema(store: _SchemaFinder, owner_id: str, family: str, version: str) -> Entry | None: """Resolve a schema by family + version, preferring caller-owned. Delegates to Store.find_schema (which handles the _system fallback at @@ -106,9 +103,7 @@ class SchemaInUseError(Exception): def __init__(self, total_count: int, referencing_records: list[str]): self.total_count = total_count self.referencing_records = referencing_records - super().__init__( - f"Cannot delete schema: {total_count} record(s) still reference it" - ) + super().__init__(f"Cannot delete schema: {total_count} record(s) still reference it") class _RefCounter(Protocol): @@ -119,9 +114,7 @@ def count_records_referencing( ) -> tuple[int, list[str]]: ... -def assert_schema_deletable( - store: _RefCounter, owner_id: str, schema_logical_key: str -) -> None: +def assert_schema_deletable(store: _RefCounter, owner_id: str, schema_logical_key: str) -> None: """Raise SchemaInUseError if any non-deleted records reference this schema.""" count, ids = store.count_records_referencing(owner_id, schema_logical_key) if count > 0: diff --git a/tests/test_cli_register_schema.py b/tests/test_cli_register_schema.py index 10c6e6b..4b1baff 100644 --- a/tests/test_cli_register_schema.py +++ b/tests/test_cli_register_schema.py @@ -39,20 +39,30 @@ def test_cli_register_schema_happy_path(pg_dsn, system_schema_file, monkeypatch, from mcp_awareness.cli_register_schema import main monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) - monkeypatch.setattr("sys.argv", [ - "mcp-awareness-register-schema", - "--system", - "--family", "schema:cli-test", - "--version", "1.0.0", - "--schema-file", system_schema_file, - "--source", "awareness-built-in", - "--tags", "cli,test", - "--description", "CLI-registered test schema", - ]) + monkeypatch.setattr( + "sys.argv", + [ + "mcp-awareness-register-schema", + "--system", + "--family", + "schema:cli-test", + "--version", + "1.0.0", + "--schema-file", + system_schema_file, + "--source", + "awareness-built-in", + "--tags", + "cli,test", + "--description", + "CLI-registered test schema", + ], + ) # Seed _system user so insert doesn't FK-violate (conftest fixture does this for store tests; # CLI creates its own PostgresStore so we seed manually here) from mcp_awareness.postgres_store import PostgresStore + tmp = PostgresStore(pg_dsn) with tmp._pool.connection() as conn, conn.cursor() as cur: cur.execute( @@ -82,11 +92,25 @@ def test_cli_register_schema_rejects_invalid_schema_file(pg_dsn, monkeypatch, ca path = f.name monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) - monkeypatch.setattr("sys.argv", [ - "mcp-awareness-register-schema", "--system", - "--family", "schema:bad", "--version", "1.0.0", - "--schema-file", path, "--source", "test", "--tags", "", "--description", "bad", - ]) + monkeypatch.setattr( + "sys.argv", + [ + "mcp-awareness-register-schema", + "--system", + "--family", + "schema:bad", + "--version", + "1.0.0", + "--schema-file", + path, + "--source", + "test", + "--tags", + "", + "--description", + "bad", + ], + ) with pytest.raises(SystemExit) as excinfo: main() assert excinfo.value.code == 1 @@ -99,12 +123,25 @@ def test_cli_register_schema_missing_db_url(monkeypatch, system_schema_file, cap from mcp_awareness.cli_register_schema import main monkeypatch.delenv("AWARENESS_DATABASE_URL", raising=False) - monkeypatch.setattr("sys.argv", [ - "mcp-awareness-register-schema", "--system", - "--family", "schema:test", "--version", "1.0.0", - "--schema-file", system_schema_file, - "--source", "test", "--tags", "", "--description", "test", - ]) + monkeypatch.setattr( + "sys.argv", + [ + "mcp-awareness-register-schema", + "--system", + "--family", + "schema:test", + "--version", + "1.0.0", + "--schema-file", + system_schema_file, + "--source", + "test", + "--tags", + "", + "--description", + "test", + ], + ) with pytest.raises(SystemExit) as excinfo: main() assert excinfo.value.code == 1 @@ -116,12 +153,25 @@ def test_cli_register_schema_missing_schema_file(pg_dsn, monkeypatch, capsys): from mcp_awareness.cli_register_schema import main monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) - monkeypatch.setattr("sys.argv", [ - "mcp-awareness-register-schema", "--system", - "--family", "schema:test", "--version", "1.0.0", - "--schema-file", "/nonexistent/path.json", - "--source", "test", "--tags", "", "--description", "test", - ]) + monkeypatch.setattr( + "sys.argv", + [ + "mcp-awareness-register-schema", + "--system", + "--family", + "schema:test", + "--version", + "1.0.0", + "--schema-file", + "/nonexistent/path.json", + "--source", + "test", + "--tags", + "", + "--description", + "test", + ], + ) with pytest.raises(SystemExit) as excinfo: main() assert excinfo.value.code == 1 diff --git a/tests/test_server.py b/tests/test_server.py index c313862..1e8922e 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -4183,8 +4183,11 @@ async def _invoke_with_sentinels(self, tool_name: str, sentinels: set[str]) -> s if tool_name == "create_record": # Register a schema first (not sentinel-wrapped — it's a prerequisite) await server_mod.register_schema( - source="setup", tags=[], description="setup", - family="schema:sentinel-record-test", version="1.0.0", + source="setup", + tags=[], + description="setup", + family="schema:sentinel-record-test", + version="1.0.0", schema={"type": "object"}, ) return await server_mod.create_record( diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index 0dd3cf8..17a35e0 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -422,11 +422,16 @@ async def test_update_entry_record_non_content_skips_revalidation(configured_ser async def test_delete_entry_schema_with_no_records_succeeds(configured_server): from mcp_awareness.tools import delete_entry, register_schema - resp = json.loads(await register_schema( - source="test", tags=[], description="s", - family="schema:thing", version="1.0.0", - schema={"type": "object"}, - )) + resp = json.loads( + await register_schema( + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", + schema={"type": "object"}, + ) + ) # No records → soft-delete succeeds await delete_entry(entry_id=resp["id"]) # Verify soft-deleted: find_schema returns None @@ -436,17 +441,26 @@ async def test_delete_entry_schema_with_no_records_succeeds(configured_server): @pytest.mark.asyncio async def test_delete_entry_schema_with_records_rejected(configured_server): from mcp.server.fastmcp.exceptions import ToolError + from mcp_awareness.tools import create_record, delete_entry, register_schema - resp = json.loads(await register_schema( - source="test", tags=[], description="s", - family="schema:thing", version="1.0.0", - schema={"type": "object"}, - )) + resp = json.loads( + await register_schema( + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", + schema={"type": "object"}, + ) + ) await create_record( - source="test", tags=[], description="r", + source="test", + tags=[], + description="r", logical_key="r1", - schema_ref="schema:thing", schema_version="1.0.0", + schema_ref="schema:thing", + schema_version="1.0.0", content={}, ) with pytest.raises(ToolError) as excinfo: @@ -461,17 +475,27 @@ async def test_delete_entry_schema_with_records_rejected(configured_server): async def test_delete_entry_schema_allowed_after_records_deleted(configured_server): from mcp_awareness.tools import create_record, delete_entry, register_schema - schema_resp = json.loads(await register_schema( - source="test", tags=[], description="s", - family="schema:thing", version="1.0.0", - schema={"type": "object"}, - )) - record_resp = json.loads(await create_record( - source="test", tags=[], description="r", - logical_key="r1", - schema_ref="schema:thing", schema_version="1.0.0", - content={}, - )) + schema_resp = json.loads( + await register_schema( + source="test", + tags=[], + description="s", + family="schema:thing", + version="1.0.0", + schema={"type": "object"}, + ) + ) + record_resp = json.loads( + await create_record( + source="test", + tags=[], + description="r", + logical_key="r1", + schema_ref="schema:thing", + schema_version="1.0.0", + content={}, + ) + ) # Soft-delete the record first await delete_entry(entry_id=record_resp["id"]) # Now schema delete succeeds @@ -481,10 +505,9 @@ async def test_delete_entry_schema_allowed_after_records_deleted(configured_serv @pytest.mark.asyncio async def test_cross_owner_schema_invisible(configured_server, store, monkeypatch): """Owner A registers a schema; Owner B cannot resolve it.""" - import mcp_awareness.server as srv - from mcp.server.fastmcp.exceptions import ToolError + import mcp_awareness.server as srv from mcp_awareness.tools import create_record, register_schema # Owner A (default TEST_OWNER) registers a schema @@ -517,9 +540,7 @@ async def test_cross_owner_schema_invisible(configured_server, store, monkeypatc async def test_both_owners_see_system_schema(configured_server, store, monkeypatch): """Both A and B can use a _system schema.""" import mcp_awareness.server as srv - from mcp_awareness.schema import Entry, make_id, now_utc - from mcp_awareness.tools import create_record # Seed _system schema directly @@ -576,12 +597,10 @@ async def test_both_owners_see_system_schema(configured_server, store, monkeypat @pytest.mark.asyncio async def test_caller_schema_overrides_system(configured_server, store, monkeypatch): """When both _system and caller have the same logical_key, caller's version wins.""" - import mcp_awareness.server as srv from mcp.server.fastmcp.exceptions import ToolError from mcp_awareness.schema import Entry, make_id, now_utc - from mcp_awareness.tools import create_record, register_schema # _system schema allows integer only From 2865d0136ad158725d67b9b31ced7a871d672291 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:26:18 -0500 Subject: [PATCH 22/28] fix: resolve mypy type errors for jsonschema import and union-attr Add mypy override for jsonschema.* (no stubs available), remove now-unnecessary type: ignore comments that mypy flagged as unused. Co-Authored-By: Claude Sonnet 4.6 --- pyproject.toml | 5 +++++ src/mcp_awareness/tools.py | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 41d1e5e..d4dfab3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,11 @@ ignore_missing_imports = true module = ["jwt.*", "zxcvbn.*"] ignore_missing_imports = true +[[tool.mypy.overrides]] +module = ["jsonschema.*"] +ignore_missing_imports = true +ignore_errors = true + # The tools/resources/prompts modules use a circular import pattern # (from . import server as _srv) to access mutable state through the server # module at call time. mypy cannot resolve attribute types on the partially- diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 5f09c0c..5f6eab7 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -554,7 +554,7 @@ async def register_schema( If you receive an unstructured error, the failure is in the transport or platform layer, not in awareness.""" import psycopg.errors - from jsonschema import exceptions as jse # type: ignore[import-untyped] + from jsonschema import exceptions as jse from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body @@ -659,7 +659,7 @@ async def create_record( If you receive an unstructured error, the failure is in the transport or platform layer, not in awareness.""" - from jsonschema import exceptions as jse # type: ignore[import-untyped] + from jsonschema import exceptions as jse from mcp_awareness.validation import resolve_schema, validate_record_content @@ -674,7 +674,7 @@ async def create_record( searched_owners=[_srv._owner_id(), "_system"], ) - schema_body = resolved.data["schema"] # type: ignore[union-attr] + schema_body = resolved.data["schema"] try: errors = validate_record_content(schema_body, content) except jse.JsonSchemaException as e: From 2bead4fb36f872bf3a90130edf6a58038acdc22a Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:32:27 -0500 Subject: [PATCH 23/28] test: add coverage tests for truncation, schema-gone, bad-json, store-error paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also fixes two latent bugs in tools.py: - register_schema: except Exception fallback was unreachable (lines 621-630) — now exercised via generic unique-violation mock - create_record: jse.JsonSchemaException doesn't exist in jsonschema 4.x — replaced with except Exception; removed now-unused jse import Brings tools.py to 100%, cli_register_schema.py to 98%, validation.py to 100%. Total: 945 tests collected (938 pass, 7 skip). Co-Authored-By: Claude Sonnet 4.6 --- src/mcp_awareness/tools.py | 4 +- tests/test_cli_register_schema.py | 85 ++++++++++++ tests/test_tools_schema_record.py | 223 ++++++++++++++++++++++++++++++ 3 files changed, 309 insertions(+), 3 deletions(-) diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 5f6eab7..5135d73 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -659,8 +659,6 @@ async def create_record( If you receive an unstructured error, the failure is in the transport or platform layer, not in awareness.""" - from jsonschema import exceptions as jse - from mcp_awareness.validation import resolve_schema, validate_record_content resolved = resolve_schema(_srv.store, _srv._owner_id(), schema_ref, schema_version) @@ -677,7 +675,7 @@ async def create_record( schema_body = resolved.data["schema"] try: errors = validate_record_content(schema_body, content) - except jse.JsonSchemaException as e: + except Exception as e: _error_response( "validation_error", f"Unexpected content validation error: {e}", diff --git a/tests/test_cli_register_schema.py b/tests/test_cli_register_schema.py index 4b1baff..0919a19 100644 --- a/tests/test_cli_register_schema.py +++ b/tests/test_cli_register_schema.py @@ -175,3 +175,88 @@ def test_cli_register_schema_missing_schema_file(pg_dsn, monkeypatch, capsys): with pytest.raises(SystemExit) as excinfo: main() assert excinfo.value.code == 1 + + +def test_cli_register_schema_bad_json(monkeypatch, capsys): + """Schema file contains invalid JSON — should exit 1 with invalid_json error.""" + import tempfile + + from mcp_awareness.cli_register_schema import main + + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: + f.write("{ not valid json }") + path = f.name + + monkeypatch.delenv("AWARENESS_DATABASE_URL", raising=False) + monkeypatch.setattr( + "sys.argv", + [ + "mcp-awareness-register-schema", + "--system", + "--family", + "schema:bad-json", + "--version", + "1.0.0", + "--schema-file", + path, + "--source", + "test", + "--tags", + "", + "--description", + "bad json test", + ], + ) + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 + captured = capsys.readouterr() + err = json.loads(captured.err.strip()) + assert err["error"]["code"] == "invalid_json" + Path(path).unlink(missing_ok=True) + + +def test_cli_register_schema_store_error(pg_dsn, system_schema_file, monkeypatch, capsys): + """store.add raises — should exit 1 with store_error code.""" + from mcp_awareness.cli_register_schema import main + + monkeypatch.setenv("AWARENESS_DATABASE_URL", pg_dsn) + monkeypatch.setattr( + "sys.argv", + [ + "mcp-awareness-register-schema", + "--system", + "--family", + "schema:store-err", + "--version", + "1.0.0", + "--schema-file", + system_schema_file, + "--source", + "test", + "--tags", + "", + "--description", + "store error test", + ], + ) + + # Patch PostgresStore.add to simulate a DB error + import mcp_awareness.postgres_store as ps_mod + + original_add = ps_mod.PostgresStore.add + + def _boom(self, owner_id, entry): + raise RuntimeError("simulated DB failure") + + monkeypatch.setattr(ps_mod.PostgresStore, "add", _boom) + + with pytest.raises(SystemExit) as excinfo: + main() + assert excinfo.value.code == 1 + captured = capsys.readouterr() + err = json.loads(captured.err.strip()) + assert err["error"]["code"] == "store_error" + assert "simulated DB failure" in err["error"]["message"] + + monkeypatch.setattr(ps_mod.PostgresStore, "add", original_add) diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index 17a35e0..18a1a1e 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -110,6 +110,103 @@ async def test_register_schema_rejects_duplicate_family_version(configured_serve assert err["code"] == "schema_already_exists" +@pytest.mark.asyncio +async def test_register_schema_generic_unique_exception_fallback(configured_server, monkeypatch): + """register_schema catches generic exceptions whose message contains 'unique'.""" + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import register_schema + + import mcp_awareness.server as srv + + original_add = srv.store.add + + def _fake_add(owner_id, entry): + raise RuntimeError("unique constraint violation (generic driver)") + + monkeypatch.setattr(srv.store, "add", _fake_add) + + with pytest.raises(ToolError) as excinfo: + await register_schema( + source="test", + tags=[], + description="generic unique test", + family="schema:unique-fallback", + version="1.0.0", + schema={"type": "object"}, + ) + err = _parse_tool_error(excinfo)["error"] + assert err["code"] == "schema_already_exists" + + monkeypatch.setattr(srv.store, "add", original_add) + + +@pytest.mark.asyncio +async def test_register_schema_generic_non_unique_exception_reraises(configured_server, monkeypatch): + """register_schema re-raises generic exceptions that are not unique violations.""" + from mcp_awareness.tools import register_schema + + import mcp_awareness.server as srv + + original_add = srv.store.add + + def _fake_add(owner_id, entry): + raise RuntimeError("connection refused") + + monkeypatch.setattr(srv.store, "add", _fake_add) + + with pytest.raises(RuntimeError, match="connection refused"): + await register_schema( + source="test", + tags=[], + description="non-unique exception test", + family="schema:reraise", + version="1.0.0", + schema={"type": "object"}, + ) + + monkeypatch.setattr(srv.store, "add", original_add) + + +@pytest.mark.asyncio +async def test_create_record_validate_raises_unexpected_error(configured_server, monkeypatch): + """create_record reports validation_error if validate_record_content raises unexpectedly.""" + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", + tags=[], + description="s", + family="schema:except", + version="1.0.0", + schema={"type": "object"}, + ) + + import mcp_awareness.validation as validation_mod + + monkeypatch.setattr( + validation_mod, + "validate_record_content", + lambda _s, _c: (_ for _ in ()).throw(RuntimeError("internal jsonschema error")), + ) + + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", + tags=[], + description="error case", + logical_key="except-rec", + schema_ref="schema:except", + schema_version="1.0.0", + content={}, + ) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "validation_error" + assert "internal jsonschema error" in err["message"] + + @pytest.mark.asyncio async def test_register_schema_rejects_empty_family(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -661,3 +758,129 @@ async def test_caller_schema_overrides_system(configured_server, store, monkeypa ) err = json.loads(str(excinfo.value))["error"] assert err["code"] == "validation_failed" + + +@pytest.mark.asyncio +async def test_create_record_validation_truncation(configured_server, monkeypatch): + """When validate_record_content returns a truncated list, create_record reports truncation.""" + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import create_record, register_schema + + await register_schema( + source="test", + tags=[], + description="s", + family="schema:trunc", + version="1.0.0", + schema={"type": "object"}, + ) + + # Patch validate_record_content at the module level so the lazy local import picks it up + import mcp_awareness.validation as validation_mod + + fake_errors = [ + {"path": f"$.f{i}", "message": "err", "validator": "type", "schema_path": "/type"} + for i in range(50) + ] + [{"truncated": True, "total_errors": 99}] + + monkeypatch.setattr(validation_mod, "validate_record_content", lambda _s, _c: fake_errors) + + with pytest.raises(ToolError) as excinfo: + await create_record( + source="test", + tags=[], + description="truncated errors test", + logical_key="trunc-rec", + schema_ref="schema:trunc", + schema_version="1.0.0", + content={}, + ) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "validation_failed" + assert err["truncated"] is True + assert err["total_errors"] == 99 + + +@pytest.mark.asyncio +async def test_update_entry_record_schema_gone(configured_server, store): + """update_entry re-validation returns schema_not_found if schema was deleted.""" + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import create_record, register_schema, update_entry + + schema_resp = json.loads( + await register_schema( + source="test", + tags=[], + description="s", + family="schema:gone", + version="1.0.0", + schema={"type": "object", "properties": {"name": {"type": "string"}}}, + ) + ) + record_resp = json.loads( + await create_record( + source="test", + tags=[], + description="r", + logical_key="r-gone", + schema_ref="schema:gone", + schema_version="1.0.0", + content={"name": "ok"}, + ) + ) + + # Soft-delete the schema directly via store (bypasses the referencing-records guard) + store.soft_delete_by_id(TEST_OWNER, schema_resp["id"]) + + # Now updating the record's content should fail with schema_not_found + with pytest.raises(ToolError) as excinfo: + await update_entry(entry_id=record_resp["id"], content={"name": "updated"}) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "schema_not_found" + + +@pytest.mark.asyncio +async def test_update_entry_record_revalidation_truncation(configured_server, monkeypatch): + """update_entry re-validation reports truncation when many errors are returned.""" + from mcp.server.fastmcp.exceptions import ToolError + + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", + tags=[], + description="s", + family="schema:trunc2", + version="1.0.0", + schema={"type": "object"}, + ) + record_resp = json.loads( + await create_record( + source="test", + tags=[], + description="r", + logical_key="r-trunc2", + schema_ref="schema:trunc2", + schema_version="1.0.0", + content={}, + ) + ) + + # Patch validate_record_content at the module level so the lazy local import picks it up + import mcp_awareness.validation as validation_mod + + fake_errors = [ + {"path": f"$.f{i}", "message": "err", "validator": "type", "schema_path": "/type"} + for i in range(50) + ] + [{"truncated": True, "total_errors": 77}] + + monkeypatch.setattr(validation_mod, "validate_record_content", lambda _s, _c: fake_errors) + + with pytest.raises(ToolError) as excinfo: + await update_entry(entry_id=record_resp["id"], content={"bad": "data"}) + err = json.loads(str(excinfo.value))["error"] + assert err["code"] == "validation_failed" + assert err["truncated"] is True + assert err["total_errors"] == 77 From 33f778013ee7889162b32e072b82f0c3e80b31e2 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:40:14 -0500 Subject: [PATCH 24/28] =?UTF-8?q?fix:=20ci=20=E2=80=94=20switch=20async=20?= =?UTF-8?q?tests=20to=20@pytest.mark.anyio;=20lint/type=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests used @pytest.mark.asyncio (local env had pytest-asyncio) but the repo's established pattern is @pytest.mark.anyio via anyio plugin. Also fixed ruff I001/E501 from Task 18 coverage tests and removed a now-unused # type: ignore in cli_register_schema.py. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp_awareness/cli_register_schema.py | 2 +- tests/test_tools_schema_record.py | 60 ++++++++++++------------ 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/mcp_awareness/cli_register_schema.py b/src/mcp_awareness/cli_register_schema.py index 458b994..3eea912 100644 --- a/src/mcp_awareness/cli_register_schema.py +++ b/src/mcp_awareness/cli_register_schema.py @@ -90,7 +90,7 @@ def main() -> None: sys.exit(1) # Meta-schema validation - from jsonschema import exceptions as jse # type: ignore[import-untyped] + from jsonschema import exceptions as jse from mcp_awareness.validation import compose_schema_logical_key, validate_schema_body diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index 18a1a1e..b67d7ff 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -46,7 +46,7 @@ def _parse_tool_error(excinfo): return json.loads(str(excinfo.value)) -@pytest.mark.asyncio +@pytest.mark.anyio async def test_register_schema_happy_path(configured_server): from mcp_awareness.tools import register_schema @@ -64,7 +64,7 @@ async def test_register_schema_happy_path(configured_server): assert "id" in body -@pytest.mark.asyncio +@pytest.mark.anyio async def test_register_schema_rejects_invalid_schema(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -83,7 +83,7 @@ async def test_register_schema_rejects_invalid_schema(configured_server): assert err["code"] == "invalid_schema" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_register_schema_rejects_duplicate_family_version(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -110,14 +110,13 @@ async def test_register_schema_rejects_duplicate_family_version(configured_serve assert err["code"] == "schema_already_exists" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_register_schema_generic_unique_exception_fallback(configured_server, monkeypatch): """register_schema catches generic exceptions whose message contains 'unique'.""" from mcp.server.fastmcp.exceptions import ToolError - from mcp_awareness.tools import register_schema - import mcp_awareness.server as srv + from mcp_awareness.tools import register_schema original_add = srv.store.add @@ -141,12 +140,11 @@ def _fake_add(owner_id, entry): monkeypatch.setattr(srv.store, "add", original_add) -@pytest.mark.asyncio -async def test_register_schema_generic_non_unique_exception_reraises(configured_server, monkeypatch): +@pytest.mark.anyio +async def test_register_schema_reraises_non_unique_exception(configured_server, monkeypatch): """register_schema re-raises generic exceptions that are not unique violations.""" - from mcp_awareness.tools import register_schema - import mcp_awareness.server as srv + from mcp_awareness.tools import register_schema original_add = srv.store.add @@ -168,7 +166,7 @@ def _fake_add(owner_id, entry): monkeypatch.setattr(srv.store, "add", original_add) -@pytest.mark.asyncio +@pytest.mark.anyio async def test_create_record_validate_raises_unexpected_error(configured_server, monkeypatch): """create_record reports validation_error if validate_record_content raises unexpectedly.""" from mcp.server.fastmcp.exceptions import ToolError @@ -207,7 +205,7 @@ async def test_create_record_validate_raises_unexpected_error(configured_server, assert "internal jsonschema error" in err["message"] -@pytest.mark.asyncio +@pytest.mark.anyio async def test_register_schema_rejects_empty_family(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -226,7 +224,7 @@ async def test_register_schema_rejects_empty_family(configured_server): assert err["code"] == "invalid_parameter" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_register_schema_rejects_empty_version(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -245,7 +243,7 @@ async def test_register_schema_rejects_empty_version(configured_server): assert err["code"] == "invalid_parameter" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_create_record_happy_path(configured_server): from mcp_awareness.tools import create_record, register_schema @@ -272,7 +270,7 @@ async def test_create_record_happy_path(configured_server): assert "id" in body -@pytest.mark.asyncio +@pytest.mark.anyio async def test_create_record_rejects_unknown_schema(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -293,7 +291,7 @@ async def test_create_record_rejects_unknown_schema(configured_server): assert err["searched_owners"] == [TEST_OWNER, "_system"] -@pytest.mark.asyncio +@pytest.mark.anyio async def test_create_record_surfaces_validation_errors(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -328,7 +326,7 @@ async def test_create_record_surfaces_validation_errors(configured_server): assert "type" in validators -@pytest.mark.asyncio +@pytest.mark.anyio async def test_create_record_upsert_on_same_logical_key(configured_server): from mcp_awareness.tools import create_record, register_schema @@ -367,7 +365,7 @@ async def test_create_record_upsert_on_same_logical_key(configured_server): assert r2["id"] == r1["id"] -@pytest.mark.asyncio +@pytest.mark.anyio async def test_create_record_uses_system_schema_fallback(configured_server, store): """A record can reference a schema owned by _system, not the caller.""" from mcp_awareness.schema import Entry, make_id, now_utc @@ -408,7 +406,7 @@ async def test_create_record_uses_system_schema_fallback(configured_server, stor assert body["action"] == "created" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_update_entry_rejects_schema_update(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -430,7 +428,7 @@ async def test_update_entry_rejects_schema_update(configured_server): assert err["code"] == "schema_immutable" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_update_entry_record_content_revalidates_valid(configured_server): from mcp_awareness.tools import create_record, register_schema, update_entry @@ -457,7 +455,7 @@ async def test_update_entry_record_content_revalidates_valid(configured_server): await update_entry(entry_id=r["id"], content={"name": "still-good"}) -@pytest.mark.asyncio +@pytest.mark.anyio async def test_update_entry_record_content_rejects_invalid(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -488,7 +486,7 @@ async def test_update_entry_record_content_rejects_invalid(configured_server): assert err["code"] == "validation_failed" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_update_entry_record_non_content_skips_revalidation(configured_server): from mcp_awareness.tools import create_record, register_schema, update_entry @@ -515,7 +513,7 @@ async def test_update_entry_record_non_content_skips_revalidation(configured_ser await update_entry(entry_id=r["id"], description="updated desc") -@pytest.mark.asyncio +@pytest.mark.anyio async def test_delete_entry_schema_with_no_records_succeeds(configured_server): from mcp_awareness.tools import delete_entry, register_schema @@ -535,7 +533,7 @@ async def test_delete_entry_schema_with_no_records_succeeds(configured_server): assert configured_server.store.find_schema(TEST_OWNER, "schema:thing:1.0.0") is None -@pytest.mark.asyncio +@pytest.mark.anyio async def test_delete_entry_schema_with_records_rejected(configured_server): from mcp.server.fastmcp.exceptions import ToolError @@ -568,7 +566,7 @@ async def test_delete_entry_schema_with_records_rejected(configured_server): assert err["total_count"] == 1 -@pytest.mark.asyncio +@pytest.mark.anyio async def test_delete_entry_schema_allowed_after_records_deleted(configured_server): from mcp_awareness.tools import create_record, delete_entry, register_schema @@ -599,7 +597,7 @@ async def test_delete_entry_schema_allowed_after_records_deleted(configured_serv await delete_entry(entry_id=schema_resp["id"]) -@pytest.mark.asyncio +@pytest.mark.anyio async def test_cross_owner_schema_invisible(configured_server, store, monkeypatch): """Owner A registers a schema; Owner B cannot resolve it.""" from mcp.server.fastmcp.exceptions import ToolError @@ -633,7 +631,7 @@ async def test_cross_owner_schema_invisible(configured_server, store, monkeypatc assert err["code"] == "schema_not_found" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_both_owners_see_system_schema(configured_server, store, monkeypatch): """Both A and B can use a _system schema.""" import mcp_awareness.server as srv @@ -691,7 +689,7 @@ async def test_both_owners_see_system_schema(configured_server, store, monkeypat assert b_resp["status"] == "ok" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_caller_schema_overrides_system(configured_server, store, monkeypatch): """When both _system and caller have the same logical_key, caller's version wins.""" @@ -760,7 +758,7 @@ async def test_caller_schema_overrides_system(configured_server, store, monkeypa assert err["code"] == "validation_failed" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_create_record_validation_truncation(configured_server, monkeypatch): """When validate_record_content returns a truncated list, create_record reports truncation.""" from mcp.server.fastmcp.exceptions import ToolError @@ -802,7 +800,7 @@ async def test_create_record_validation_truncation(configured_server, monkeypatc assert err["total_errors"] == 99 -@pytest.mark.asyncio +@pytest.mark.anyio async def test_update_entry_record_schema_gone(configured_server, store): """update_entry re-validation returns schema_not_found if schema was deleted.""" from mcp.server.fastmcp.exceptions import ToolError @@ -841,7 +839,7 @@ async def test_update_entry_record_schema_gone(configured_server, store): assert err["code"] == "schema_not_found" -@pytest.mark.asyncio +@pytest.mark.anyio async def test_update_entry_record_revalidation_truncation(configured_server, monkeypatch): """update_entry re-validation reports truncation when many errors are returned.""" from mcp.server.fastmcp.exceptions import ToolError From adfb145827a88cef4a78dac83636fd424a7604eb Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 07:39:45 -0500 Subject: [PATCH 25/28] test: cover __main__ guard in cli_register_schema via runpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brings patch coverage on cli_register_schema.py from 97.95% to 100% — the `if __name__ == '__main__': main()` line was the only miss flagged by Codecov. Uses runpy.run_module with run_name='__main__' instead of subprocess; faster and stays in-process. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_cli_register_schema.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_cli_register_schema.py b/tests/test_cli_register_schema.py index 0919a19..d3b386f 100644 --- a/tests/test_cli_register_schema.py +++ b/tests/test_cli_register_schema.py @@ -260,3 +260,32 @@ def _boom(self, owner_id, entry): assert "simulated DB failure" in err["error"]["message"] monkeypatch.setattr(ps_mod.PostgresStore, "add", original_add) + + +def test_cli_register_schema_module_runs_as_main(monkeypatch): + """Cover the `if __name__ == '__main__': main()` guard via runpy.""" + import runpy + + # No DB URL → main exits before touching the store. Covers the guard cleanly. + monkeypatch.delenv("AWARENESS_DATABASE_URL", raising=False) + monkeypatch.setattr( + "sys.argv", + [ + "mcp-awareness-register-schema", + "--system", + "--family", + "s:test", + "--version", + "1.0.0", + "--schema-file", + "/nonexistent/path.json", + "--source", + "test", + "--tags", + "", + "--description", + "test", + ], + ) + with pytest.raises(SystemExit): + runpy.run_module("mcp_awareness.cli_register_schema", run_name="__main__") From 0e2e62924bae90803942353ecea4e79cb2a360c0 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 09:15:30 -0500 Subject: [PATCH 26/28] fix: address PR #287 QA Round 1 findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F1 (blocker): update_entry no longer stringifies RECORD content via json.dumps, so record entries keep native JSON shape (dict/list/primitive) across updates — matches the create path. Non-RECORD knowledge types continue to stringify as before. Adds tests that read back stored shape after update for both object and primitive content. F2: document the validation_error code (belt-and-suspenders fallback for unexpected validator exceptions) in the design spec's error table. F3: explicit comments near delete_entry's bulk-delete paths marking the gap, plus follow-up issue #288 to wire schema_in_use protection into the by-tags and by-source paths. F4: count_records_referencing asserts the ref:version invariant at the store boundary (defense-in-depth; empty version is already blocked at register_schema, but the Store API is public). F5: m8h9i0j1k2l3.downgrade() is now a warn-only no-op when _system-owned entries still exist — avoids FK-failing the whole downgrade transaction and surfaces the manual cleanup step operators must perform first. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 6 +++ ...8h9i0j1k2l3_add_system_user_for_schemas.py | 23 ++++++++++-- ...-04-13-schema-record-entry-types-design.md | 1 + src/mcp_awareness/postgres_store.py | 10 ++++- src/mcp_awareness/tools.py | 29 +++++++++------ tests/test_store.py | 12 ++++++ tests/test_tools_schema_record.py | 37 +++++++++++++++++++ 7 files changed, 103 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 610ab5a..2a127b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New migration: `_system` user seed (idempotent). - `_error_response()` helper now accepts `**extras` kwargs so tools can include structured fields in error envelopes beyond the fixed set (e.g., `validation_errors`, `schema_ref`, `referencing_records`). +### Fixed +- **RECORD content shape preserved across `update_entry`** — previously `update_entry` stringified non-string content via `json.dumps()` before handing it to the store, causing RECORD entries to drift from a native JSON object/array/primitive (how they are stored on create) to a JSON-encoded string after any content update. `update_entry` now branches on the existing entry's type: RECORD entries persist native JSON content to match the create path, while other knowledge types (note / pattern / context / preference) retain the existing stringify-on-write behavior. +- **Bulk-delete paths (`delete_entry` by tags/source) still do not consult `schema_in_use`** — single-id schema deletion is protected; bulk paths are explicitly flagged in the code and tracked by [#288](https://github.com/cmeans/mcp-awareness/issues/288). Not changed in this PR (out of scope per the design), but documented where the gap lives. +- **`count_records_referencing` store boundary hardening** — `schema_logical_key` parsing now asserts the `ref:version` invariant (non-empty ref + non-empty version after the last `:`). Empty version is blocked at `register_schema`, but the store API is public, so we fail loudly here rather than silently running a non-matching query. +- **`_system` user downgrade no-op** — `m8h9i0j1k2l3.downgrade()` now checks for referencing entries before `DELETE`. If any exist, it logs a warning and returns rather than FK-failing the whole transaction. Operators can soft-delete or re-home the referenced entries and re-run downgrade. + ### Dependencies - Added `jsonschema>=4.26.0` as a runtime dependency. diff --git a/alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py b/alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py index ed49c5c..15348de 100644 --- a/alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py +++ b/alembic/versions/m8h9i0j1k2l3_add_system_user_for_schemas.py @@ -24,10 +24,13 @@ from __future__ import annotations +import logging from collections.abc import Sequence from alembic import op +logger = logging.getLogger("alembic.runtime.migration") + revision: str = "m8h9i0j1k2l3" down_revision: str | Sequence[str] | None = "l7g8h9i0j1k2" branch_labels: str | Sequence[str] | None = None @@ -48,9 +51,23 @@ def upgrade() -> None: def downgrade() -> None: - """Remove the _system user. + """Remove the _system user, if safe to do so. - Will fail if any entries still reference owner_id='_system'. Operators - must soft-delete or re-home such entries before downgrade. + This downgrade is a no-op when `_system`-owned entries still exist (schemas + seeded via ``mcp-awareness-register-schema --system``, for example). A hard + DELETE would FK-fail and abort the entire downgrade transaction — masking + any subsequent downgrade steps from surfacing. The warning surfaces the + manual step required: operators who really want to remove `_system` must + first soft-delete or re-home the referenced entries, then re-run downgrade. """ + conn = op.get_bind() + referenced = conn.exec_driver_sql( + "SELECT 1 FROM entries WHERE owner_id = '_system' LIMIT 1" + ).fetchone() + if referenced is not None: + logger.warning( + "Skipping delete of users._system — entries still reference it. " + "Soft-delete or re-home those entries, then re-run downgrade." + ) + return op.execute("DELETE FROM users WHERE id = '_system'") diff --git a/docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md b/docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md index f3ee53a..2cc54a1 100644 --- a/docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md +++ b/docs/superpowers/specs/2026-04-13-schema-record-entry-types-design.md @@ -267,6 +267,7 @@ All errors route through existing `_error_response()` helper (`helpers.py:214`) | `schema_immutable` | `update_entry` on schema | false | — | | `record_schema_pin_immutable` | record update tries to change pin fields | false | `param` | | `schema_in_use` | `delete_entry` on referenced schema | false | `referencing_records`, `total_count?` | +| `validation_error` | unexpected exception from the validator (belt-and-suspenders — schemas are meta-validated on register, so this should be unreachable in practice) | false | — | ### Validation error envelope shape diff --git a/src/mcp_awareness/postgres_store.py b/src/mcp_awareness/postgres_store.py index 30e8ca8..6eb0150 100644 --- a/src/mcp_awareness/postgres_store.py +++ b/src/mcp_awareness/postgres_store.py @@ -1399,8 +1399,16 @@ def count_records_referencing( Splits schema_logical_key on the last ':' to obtain schema_ref and version. schema_ref may itself contain ':' (e.g. "schema:edge-manifest:1.0.0"). Matches data.schema_ref and data.schema_version in the record entries' JSONB. + + Invariant: schema_logical_key must be a `ref:version` composed by + ``compose_schema_logical_key``. Empty ref or empty version would split + into a non-matching query; empty version is blocked at register_schema, + but we assert here as defense-in-depth since the store API is public. """ - ref, _, version = schema_logical_key.rpartition(":") + ref, sep, version = schema_logical_key.rpartition(":") + assert sep == ":", f"schema_logical_key must contain ':': {schema_logical_key!r}" + assert ref, f"schema_logical_key has empty ref component: {schema_logical_key!r}" + assert version, f"schema_logical_key has empty version component: {schema_logical_key!r}" with self._pool.connection() as conn, conn.transaction(), conn.cursor() as cur: self._set_rls_context(cur, owner_id) cur.execute(_load_sql("count_records_referencing"), (owner_id, ref, version)) diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 5135d73..148c1db 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -761,19 +761,16 @@ async def update_entry( updates["tags"] = tags if source is not None: updates["source"] = source - # Preserve the raw content value for re-validation before stringifying it. - _raw_content: Any = content - if content is not None: - if not isinstance(content, str): - content = json.dumps(content) - updates["content"] = content if content_type is not None: updates["content_type"] = content_type if language is not None: from .language import iso_to_regconfig updates["language"] = iso_to_regconfig(language) - if not updates: + # Content is normalized below once the entry type is known: RECORD entries + # keep native JSON shape (dict/list/primitive) so the wire shape matches the + # create path; other knowledge types stringify non-string content as before. + if content is None and not updates: _error_response( "invalid_parameter", "No fields to update — provide at least one of: " @@ -793,7 +790,7 @@ async def update_entry( "Schemas cannot be updated. Register a new version instead.", retryable=False, ) - if _existing.type == _EntryType.RECORD and _raw_content is not None: + if _existing.type == _EntryType.RECORD and content is not None: _schema_ref = _existing.data["schema_ref"] _schema_version = _existing.data["schema_version"] _resolved = resolve_schema(_srv.store, _srv._owner_id(), _schema_ref, _schema_version) @@ -806,9 +803,7 @@ async def update_entry( schema_version=_schema_version, searched_owners=[_srv._owner_id(), "_system"], ) - _content_to_validate = ( - json.loads(_raw_content) if isinstance(_raw_content, str) else _raw_content - ) + _content_to_validate: Any = json.loads(content) if isinstance(content, str) else content _errors = validate_record_content(_resolved.data["schema"], _content_to_validate) if _errors: _truncated = _errors[-1].get("truncated") is True @@ -831,7 +826,13 @@ async def update_entry( retryable=False, **_vf_extras, ) + # RECORD content is stored as native JSON to match the create path. + updates["content"] = _content_to_validate # --- end branching --- + if content is not None and "content" not in updates: + # Non-record knowledge types (note/pattern/context/preference) persist + # content as a string; stringify non-string payloads for consistency. + updates["content"] = content if isinstance(content, str) else json.dumps(content) result = _srv.store.update_entry(_srv._owner_id(), entry_id, updates) if result is None: _error_response( @@ -1038,6 +1039,9 @@ async def delete_entry( } ) if tags: + # NOTE: bulk-delete by tags does NOT currently consult assert_schema_deletable; + # a schema referenced by live records can be soft-deleted here, unlike the + # single-id path above. Tracked as a follow-up — see issue #288. if not confirm: # Use AND logic to match soft_delete_by_tags behavior all_entries = _srv.store.get_entries(_srv._owner_id(), tags=tags) @@ -1067,6 +1071,9 @@ async def delete_entry( retryable=False, ) et = _parse_entry_type(entry_type) + # NOTE: bulk-delete by source (± entry_type) does NOT consult + # assert_schema_deletable; schemas referenced by live records can still be + # soft-deleted here, unlike the single-id path above. Tracked by issue #288. if not confirm: entries = _srv.store.get_entries(_srv._owner_id(), entry_type=et, source=source) return json.dumps( diff --git a/tests/test_store.py b/tests/test_store.py index 7de182c..18ec921 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -3391,6 +3391,18 @@ def test_count_records_referencing_caps_id_list_at_ten(store): assert len(ids) == 10 +def test_count_records_referencing_rejects_malformed_key(store): + """The store boundary asserts the ref:version invariant for defense-in-depth.""" + import pytest + + with pytest.raises(AssertionError, match="must contain ':'"): + store.count_records_referencing(TEST_OWNER, "no-colon") + with pytest.raises(AssertionError, match="empty version"): + store.count_records_referencing(TEST_OWNER, "s:test:") + with pytest.raises(AssertionError, match="empty ref"): + store.count_records_referencing(TEST_OWNER, ":1.0.0") + + def test_system_user_exists_after_migration_idempotent(store): """The conftest fixture inserts _system — verifies ON CONFLICT DO NOTHING semantics.""" with store._pool.connection() as conn, conn.cursor() as cur: diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index b67d7ff..7177615 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -453,6 +453,43 @@ async def test_update_entry_record_content_revalidates_valid(configured_server): ) # Valid content update — passes re-validation await update_entry(entry_id=r["id"], content={"name": "still-good"}) + # Content shape must remain native JSON (dict), not a JSON-encoded string — + # matches the create path so downstream consumers see a stable wire shape. + stored = configured_server.store.get_entry_by_id(TEST_OWNER, r["id"]) + assert stored is not None + assert stored.data["content"] == {"name": "still-good"} + assert isinstance(stored.data["content"], dict) + + +@pytest.mark.anyio +async def test_update_entry_record_preserves_primitive_content_shape(configured_server): + """Primitive (int/array) record content must also keep native JSON shape after update.""" + from mcp_awareness.tools import create_record, register_schema, update_entry + + await register_schema( + source="test", + tags=[], + description="s", + family="schema:counter", + version="1.0.0", + schema={"type": "integer"}, + ) + r = json.loads( + await create_record( + source="test", + tags=[], + description="c", + logical_key="c1", + schema_ref="schema:counter", + schema_version="1.0.0", + content=42, + ) + ) + await update_entry(entry_id=r["id"], content=99) + stored = configured_server.store.get_entry_by_id(TEST_OWNER, r["id"]) + assert stored is not None + assert stored.data["content"] == 99 + assert isinstance(stored.data["content"], int) @pytest.mark.anyio From e8affb9beb158d80418ff57e94a6335b24a1f169 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 09:58:51 -0500 Subject: [PATCH 27/28] fix: address PR #287 QA Round 2 findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit B1 (blocker): The strict `owner_isolation` RLS policy on `entries` (USING owner_id = current_user) filtered out `_system`-owned rows for non-superuser DB roles, making the CLI bootstrap + `find_schema` fallback a silent no-op in production. Migration n9i0j1k2l3m4 narrows the fix to a read-only carve-out: non-privileged owners can now read `_system`-owned schema entries, but writes still enforce strict owner isolation via the unchanged `owner_insert` WITH CHECK policy. Regression coverage in `test_rls.py::TestRLSSystemSchemaFallback` runs under FORCE ROW LEVEL SECURITY (mimics the production non-superuser role): verifies `_system` schemas are visible to any owner, caller-owned schemas win when both exist, and the carve-out stays narrow (other `_system`-owned types remain invisible). S1: `schema_already_exists` error envelope now returns `logical_key` and a best-effort `existing_id` as structured extras alongside the human-readable message — matches the design-doc error-code table. O1: `mcp-awareness-register-schema` CLI now runs description text through `resolve_language()` (same chain as the MCP path) instead of pinning every CLI-seeded schema to `english`. O2: Removed the dead string-matching fallback in `register_schema`. The psycopg `UniqueViolation` branch covers the real driver path; the `"unique"`/`"duplicate"`/`"23505"` message-match code was unreachable. O3: Dropped the no-op `ignore_errors = true` from the `jsonschema.*` mypy override — `ignore_missing_imports = true` alone covers the import, and the project has no code under `jsonschema.*` to silence. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 5 + ...0j1k2l3m4_rls_allow_system_schema_reads.py | 75 +++++++++++ pyproject.toml | 1 - src/mcp_awareness/cli_register_schema.py | 7 +- src/mcp_awareness/tools.py | 18 ++- tests/test_rls.py | 117 +++++++++++++++++- tests/test_tools_schema_record.py | 34 +---- 7 files changed, 210 insertions(+), 47 deletions(-) create mode 100644 alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a127b6..f45437c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `_error_response()` helper now accepts `**extras` kwargs so tools can include structured fields in error envelopes beyond the fixed set (e.g., `validation_errors`, `schema_ref`, `referencing_records`). ### Fixed +- **RLS carve-out for `_system`-owned schema reads** — migration `n9i0j1k2l3m4` alters the `entries.owner_isolation` policy `USING` clause to also allow reads where `owner_id = '_system' AND type = 'schema'`. Before this change, the strict `owner_id = current_user` USING clause filtered out `_system`-owned rows for non-superuser DB roles, making the `find_schema` fallback (and the whole CLI bootstrap pattern for built-in schemas) a no-op in production. Writes remain isolated — the `owner_insert` `WITH CHECK` policy still requires `owner_id = current_user`, so no one can write to `_system` via the MCP path. +- **`schema_already_exists` error envelope** — register_schema now returns `logical_key` and a best-effort `existing_id` as structured fields alongside the human-readable message (matches the design-doc error-code table; callers no longer have to parse the message to locate the conflicting entry). - **RECORD content shape preserved across `update_entry`** — previously `update_entry` stringified non-string content via `json.dumps()` before handing it to the store, causing RECORD entries to drift from a native JSON object/array/primitive (how they are stored on create) to a JSON-encoded string after any content update. `update_entry` now branches on the existing entry's type: RECORD entries persist native JSON content to match the create path, while other knowledge types (note / pattern / context / preference) retain the existing stringify-on-write behavior. - **Bulk-delete paths (`delete_entry` by tags/source) still do not consult `schema_in_use`** — single-id schema deletion is protected; bulk paths are explicitly flagged in the code and tracked by [#288](https://github.com/cmeans/mcp-awareness/issues/288). Not changed in this PR (out of scope per the design), but documented where the gap lives. - **`count_records_referencing` store boundary hardening** — `schema_logical_key` parsing now asserts the `ref:version` invariant (non-empty ref + non-empty version after the last `:`). Empty version is blocked at `register_schema`, but the store API is public, so we fail loudly here rather than silently running a non-matching query. - **`_system` user downgrade no-op** — `m8h9i0j1k2l3.downgrade()` now checks for referencing entries before `DELETE`. If any exist, it logs a warning and returns rather than FK-failing the whole transaction. Operators can soft-delete or re-home the referenced entries and re-run downgrade. +- **CLI language resolution** — `mcp-awareness-register-schema` now runs the description through `resolve_language()` (same chain as the MCP path) instead of pinning every CLI-seeded schema to `english`. Auto-detection falls back to `simple` for short/unknown-language descriptions. +- **Dead-code cleanup in `register_schema`** — removed the string-matching fallback (`"unique"` / `"duplicate"` / `"23505"` in the exception message) in favor of the psycopg-native `UniqueViolation` branch. The fallback was unreachable under the `psycopg`-direct driver the project uses. +- **Mypy override cleanup** — dropped the no-op `ignore_errors = true` from the `jsonschema.*` override in `pyproject.toml`. `ignore_missing_imports = true` alone covers the import; there is no project code under `jsonschema.*` to silence. ### Dependencies - Added `jsonschema>=4.26.0` as a runtime dependency. diff --git a/alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py b/alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py new file mode 100644 index 0000000..aba264e --- /dev/null +++ b/alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py @@ -0,0 +1,75 @@ +# mcp-awareness — ambient system awareness for AI agents +# Copyright (C) 2026 Chris Means +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""RLS: allow all owners to read _system-owned schema entries + +Revision ID: n9i0j1k2l3m4 +Revises: m8h9i0j1k2l3 +Create Date: 2026-04-14 00:00:00.000000 + +The owner_isolation SELECT/UPDATE/DELETE policy on entries was + + USING (owner_id = current_setting('app.current_user', true)) + +which — under FORCE ROW LEVEL SECURITY for non-superuser roles — filters +out `_system`-owned rows. That blocks the schema-fallback design for +built-in schemas registered via ``mcp-awareness-register-schema --system`` +because the `find_schema` query's ``owner_id IN (%s, '_system')`` clause is +evaluated AFTER RLS strips the `_system` row. + +This migration narrows the read carve-out to `_system`-owned *schema* rows +only. Writes remain isolated by the existing `owner_insert` WITH CHECK +policy (which still requires `owner_id = current_user`), so operators +cannot accidentally write to `_system` via the MCP path — the CLI +(`mcp-awareness-register-schema --system`) bypasses MCP and connects as +whichever DB role the operator chose. + +Rationale: option 1 from the PR #287 Round-2 QA review (narrowest +change, read-only carve-out, no SECURITY DEFINER functions needed). +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from alembic import op + +revision: str = "n9i0j1k2l3m4" +down_revision: str | Sequence[str] | None = "m8h9i0j1k2l3" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Replace the owner_isolation policy on `entries` to allow reads of + `_system`-owned schema rows from any owner context.""" + op.execute("DROP POLICY IF EXISTS owner_isolation ON entries") + op.execute(""" + CREATE POLICY owner_isolation ON entries + USING ( + owner_id = current_setting('app.current_user', true) + OR (owner_id = '_system' AND type = 'schema') + ) + """) + + +def downgrade() -> None: + """Restore the strict-isolation policy on `entries`.""" + op.execute("DROP POLICY IF EXISTS owner_isolation ON entries") + op.execute(""" + CREATE POLICY owner_isolation ON entries + USING (owner_id = current_setting('app.current_user', true)) + """) diff --git a/pyproject.toml b/pyproject.toml index d4dfab3..cce2399 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,6 @@ ignore_missing_imports = true [[tool.mypy.overrides]] module = ["jsonschema.*"] ignore_missing_imports = true -ignore_errors = true # The tools/resources/prompts modules use a circular import pattern # (from . import server as _srv) to access mutable state through the server diff --git a/src/mcp_awareness/cli_register_schema.py b/src/mcp_awareness/cli_register_schema.py index 3eea912..7b57452 100644 --- a/src/mcp_awareness/cli_register_schema.py +++ b/src/mcp_awareness/cli_register_schema.py @@ -127,12 +127,17 @@ def main() -> None: ) sys.exit(1) + from mcp_awareness.language import resolve_language from mcp_awareness.postgres_store import PostgresStore from mcp_awareness.schema import Entry, EntryType, make_id, now_utc store = PostgresStore(database_url) logical_key = compose_schema_logical_key(args.family, args.version) tags = [t.strip() for t in args.tags.split(",") if t.strip()] + # Match the MCP path: run the description through the standard + # language-resolution chain (lingua auto-detection, SIMPLE fallback) + # instead of pinning every CLI-seeded schema to english. + resolved_lang = resolve_language(text_for_detection=args.description) entry = Entry( id=make_id(), @@ -149,7 +154,7 @@ def main() -> None: "learned_from": "cli-bootstrap", }, logical_key=logical_key, - language="english", + language=resolved_lang, ) try: diff --git a/src/mcp_awareness/tools.py b/src/mcp_awareness/tools.py index 148c1db..0672d9a 100644 --- a/src/mcp_awareness/tools.py +++ b/src/mcp_awareness/tools.py @@ -613,21 +613,19 @@ async def register_schema( try: _srv.store.add(_srv._owner_id(), entry) except psycopg.errors.UniqueViolation: + # Surface the structured fields promised by the error-code table + # (design doc §Error codes): logical_key and existing_id. existing_id + # is best-effort — if the lookup fails for any reason we still return + # a useful error rather than raising over the original error. + _existing = _srv.store.find_schema(_srv._owner_id(), logical_key) + _existing_id = _existing.id if _existing is not None else None _error_response( "schema_already_exists", f"Schema {logical_key} already exists in source {source!r}", retryable=False, + logical_key=logical_key, + existing_id=_existing_id, ) - except Exception as e: - # Fallback: detect unique constraint via message for non-psycopg wrappers - msg = str(e).lower() - if "unique" in msg or "duplicate" in msg or "23505" in msg: - _error_response( - "schema_already_exists", - f"Schema {logical_key} already exists in source {source!r}", - retryable=False, - ) - raise _srv._generate_embedding(entry) return json.dumps({"status": "ok", "id": entry.id, "logical_key": logical_key}) diff --git a/tests/test_rls.py b/tests/test_rls.py index 4e24cd1..8f1b44a 100644 --- a/tests/test_rls.py +++ b/tests/test_rls.py @@ -48,11 +48,22 @@ def rls_store(pg_dsn: str) -> PostgresStore: cur.execute(f"DROP POLICY IF EXISTS owner_isolation ON {table}") cur.execute(f"DROP POLICY IF EXISTS owner_insert ON {table}") - # Create policies - cur.execute(f""" - CREATE POLICY owner_isolation ON {table} - USING (owner_id = current_setting('app.current_user', true)) - """) + # Create policies — entries gets the _system-schema read carve-out + # added in migration n9i0j1k2l3m4 so non-privileged owners can see + # built-in schemas. Other tables keep strict owner isolation. + if table == "entries": + cur.execute(f""" + CREATE POLICY owner_isolation ON {table} + USING ( + owner_id = current_setting('app.current_user', true) + OR (owner_id = '_system' AND type = 'schema') + ) + """) + else: + cur.execute(f""" + CREATE POLICY owner_isolation ON {table} + USING (owner_id = current_setting('app.current_user', true)) + """) cur.execute(f""" CREATE POLICY owner_insert ON {table} FOR INSERT @@ -195,3 +206,99 @@ def test_action_logs_isolated(self, rls_store: PostgresStore) -> None: bob_actions = rls_store.get_actions("bob", entry_id=entry.id) assert len(alice_actions) >= 1 assert len(bob_actions) == 0 + + +class TestRLSSystemSchemaFallback: + """RLS carve-out for `_system`-owned schema reads (migration n9i0j1k2l3m4). + + Regression coverage for the PR #287 Round-2 blocker: the strict + `owner_id = current_user` USING clause made `_system` schemas invisible + to every non-superuser owner, breaking the CLI bootstrap + find_schema + fallback in production. These tests run under FORCE ROW LEVEL SECURITY, + which simulates the production non-superuser role. + """ + + def test_system_schema_visible_to_any_owner(self, rls_store: PostgresStore) -> None: + """A `_system`-owned schema row is readable by `alice` via find_schema.""" + from mcp_awareness.schema import Entry + + schema_entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="system-bootstrap", + tags=[], + created=now_utc(), + expires=None, + data={ + "family": "schema:shared-thing", + "version": "1.0.0", + "schema": {"type": "object"}, + "description": "shared", + }, + logical_key="schema:shared-thing:1.0.0", + ) + rls_store.add("_system", schema_entry) + + found = rls_store.find_schema("alice", "schema:shared-thing:1.0.0") + assert found is not None + assert found.id == schema_entry.id + assert found.data["family"] == "schema:shared-thing" + + def test_caller_schema_wins_over_system(self, rls_store: PostgresStore) -> None: + """If alice has her own copy, find_schema returns that instead of _system's.""" + from mcp_awareness.schema import Entry + + system_entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="system-bootstrap", + tags=[], + created=now_utc(), + expires=None, + data={"family": "schema:override", "version": "1.0.0", "schema": {"type": "object"}}, + logical_key="schema:override:1.0.0", + ) + rls_store.add("_system", system_entry) + + alice_entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="alice-source", + tags=[], + created=now_utc(), + expires=None, + data={"family": "schema:override", "version": "1.0.0", "schema": {"type": "string"}}, + logical_key="schema:override:1.0.0", + ) + rls_store.add("alice", alice_entry) + + found = rls_store.find_schema("alice", "schema:override:1.0.0") + assert found is not None + assert found.id == alice_entry.id + + def test_system_non_schema_rows_remain_invisible(self, rls_store: PostgresStore) -> None: + """The carve-out is narrow: only `type = 'schema'`. Other _system rows stay hidden.""" + from mcp_awareness.schema import Entry + + note_entry = Entry( + id=make_id(), + type=EntryType.NOTE, + source="sys-note", + tags=["rls-sys-note"], + created=now_utc(), + expires=None, + data={"description": "system-only"}, + ) + rls_store.add("_system", note_entry) + + alice_view = rls_store.get_entries("alice", tags=["rls-sys-note"]) + assert alice_view == [] + + # NOTE: A WITH-CHECK negative case ("alice cannot INSERT with + # owner_id='_system'") is intentionally NOT asserted here. The testcontainers + # postgres role does not reliably reject the raw INSERT even under FORCE ROW + # LEVEL SECURITY, so the test would not exercise the policy against the real + # production (non-superuser) role. The SELECT-side carve-out is the + # load-bearing behavior change in this migration and is covered by the three + # cases above; production WITH-CHECK enforcement is covered by the + # non-superuser repro documented in PR #287 round-2 QA feedback. diff --git a/tests/test_tools_schema_record.py b/tests/test_tools_schema_record.py index 7177615..5fded75 100644 --- a/tests/test_tools_schema_record.py +++ b/tests/test_tools_schema_record.py @@ -108,36 +108,10 @@ async def test_register_schema_rejects_duplicate_family_version(configured_serve ) err = _parse_tool_error(excinfo)["error"] assert err["code"] == "schema_already_exists" - - -@pytest.mark.anyio -async def test_register_schema_generic_unique_exception_fallback(configured_server, monkeypatch): - """register_schema catches generic exceptions whose message contains 'unique'.""" - from mcp.server.fastmcp.exceptions import ToolError - - import mcp_awareness.server as srv - from mcp_awareness.tools import register_schema - - original_add = srv.store.add - - def _fake_add(owner_id, entry): - raise RuntimeError("unique constraint violation (generic driver)") - - monkeypatch.setattr(srv.store, "add", _fake_add) - - with pytest.raises(ToolError) as excinfo: - await register_schema( - source="test", - tags=[], - description="generic unique test", - family="schema:unique-fallback", - version="1.0.0", - schema={"type": "object"}, - ) - err = _parse_tool_error(excinfo)["error"] - assert err["code"] == "schema_already_exists" - - monkeypatch.setattr(srv.store, "add", original_add) + # Structured extras — callers can locate the existing entry without parsing + # the human-readable message. Matches design-doc error-code table. + assert err["logical_key"] == "schema:dup:1.0.0" + assert err["existing_id"] # non-empty; first-register's entry id @pytest.mark.anyio From 0a7f5186778da13df1c89297a04e5716539bc9d8 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <3223881+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Tue, 14 Apr 2026 10:30:34 -0500 Subject: [PATCH 28/28] =?UTF-8?q?fix:=20address=20PR=20#287=20QA=20Round?= =?UTF-8?q?=203=20=E2=80=94=20B2=20RLS=20write-side=20regression?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The round-2 migration's new owner_isolation policy used a FOR ALL permissive policy with a USING clause only. In Postgres, when no WITH CHECK is supplied, USING is reused for INSERT/UPDATE — and because permissive policies combine with OR, the `_system` schema read carve-out leaked into the write path, letting non-superuser owners INSERT/UPDATE rows with owner_id='_system' AND type='schema' (empirically reproduced by QA against a non-superuser role on the docker-compose.qa.yaml stack). Fix: amend migration n9i0j1k2l3m4 with an explicit strict WITH CHECK (owner_id = current_user). Writes now match the caller's own owner_id regardless of the USING carve-out. The migration has not shipped to any production environment yet — QA rolled back their repro — so amending in place (same revision id) is safe. Regression coverage in test_rls.py: - test_nonsuperuser_cannot_insert_as_system - test_nonsuperuser_cannot_update_system_schema Both use SET LOCAL ROLE onto a freshly-provisioned NOSUPERUSER NOBYPASSRLS role so the testcontainers default-superuser BYPASSRLS privilege doesn't mask the policy. The NOTE previously excluding the WITH-CHECK negative case is replaced by the real test. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 2 +- ...0j1k2l3m4_rls_allow_system_schema_reads.py | 21 ++-- tests/test_rls.py | 111 ++++++++++++++++-- 3 files changed, 117 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f45437c..1560939 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `_error_response()` helper now accepts `**extras` kwargs so tools can include structured fields in error envelopes beyond the fixed set (e.g., `validation_errors`, `schema_ref`, `referencing_records`). ### Fixed -- **RLS carve-out for `_system`-owned schema reads** — migration `n9i0j1k2l3m4` alters the `entries.owner_isolation` policy `USING` clause to also allow reads where `owner_id = '_system' AND type = 'schema'`. Before this change, the strict `owner_id = current_user` USING clause filtered out `_system`-owned rows for non-superuser DB roles, making the `find_schema` fallback (and the whole CLI bootstrap pattern for built-in schemas) a no-op in production. Writes remain isolated — the `owner_insert` `WITH CHECK` policy still requires `owner_id = current_user`, so no one can write to `_system` via the MCP path. +- **RLS carve-out for `_system`-owned schema reads** — migration `n9i0j1k2l3m4` alters the `entries.owner_isolation` policy: `USING` now allows reads where `owner_id = '_system' AND type = 'schema'`, while an **explicit** `WITH CHECK (owner_id = current_user)` keeps writes strictly owner-scoped. Before this change, the strict `owner_id = current_user` USING clause filtered out `_system`-owned rows for non-superuser DB roles, making the `find_schema` fallback (and the whole CLI bootstrap pattern for built-in schemas) a no-op in production. The WITH CHECK clause is explicit because a `FOR ALL` permissive policy without one reuses `USING` for INSERT/UPDATE — without it the read carve-out would leak into the write path and let non-privileged owners stamp `_system`-owned schema rows. - **`schema_already_exists` error envelope** — register_schema now returns `logical_key` and a best-effort `existing_id` as structured fields alongside the human-readable message (matches the design-doc error-code table; callers no longer have to parse the message to locate the conflicting entry). - **RECORD content shape preserved across `update_entry`** — previously `update_entry` stringified non-string content via `json.dumps()` before handing it to the store, causing RECORD entries to drift from a native JSON object/array/primitive (how they are stored on create) to a JSON-encoded string after any content update. `update_entry` now branches on the existing entry's type: RECORD entries persist native JSON content to match the create path, while other knowledge types (note / pattern / context / preference) retain the existing stringify-on-write behavior. - **Bulk-delete paths (`delete_entry` by tags/source) still do not consult `schema_in_use`** — single-id schema deletion is protected; bulk paths are explicitly flagged in the code and tracked by [#288](https://github.com/cmeans/mcp-awareness/issues/288). Not changed in this PR (out of scope per the design), but documented where the gap lives. diff --git a/alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py b/alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py index aba264e..abb8425 100644 --- a/alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py +++ b/alembic/versions/n9i0j1k2l3m4_rls_allow_system_schema_reads.py @@ -31,14 +31,19 @@ evaluated AFTER RLS strips the `_system` row. This migration narrows the read carve-out to `_system`-owned *schema* rows -only. Writes remain isolated by the existing `owner_insert` WITH CHECK -policy (which still requires `owner_id = current_user`), so operators -cannot accidentally write to `_system` via the MCP path — the CLI -(`mcp-awareness-register-schema --system`) bypasses MCP and connects as -whichever DB role the operator chose. +only. Writes are kept strictly isolated by an explicit ``WITH CHECK`` +clause — without it, a ``FOR ALL`` permissive policy's ``USING`` is used +for INSERT/UPDATE too, and (because permissive policies combine with OR) +the read carve-out would leak into the write path, allowing non-privileged +owners to INSERT/UPDATE rows with ``owner_id = '_system' AND type = 'schema'`` +(PR #287 Round-3 QA reproduction). Keeping the write check strict ensures +the only path that can seed ``_system`` schemas is the CLI +(``mcp-awareness-register-schema --system``) which bypasses MCP entirely +and connects as whichever DB role the operator chose. Rationale: option 1 from the PR #287 Round-2 QA review (narrowest -change, read-only carve-out, no SECURITY DEFINER functions needed). +change, read-only carve-out, no SECURITY DEFINER functions needed) — +plus the explicit WITH CHECK added in Round 3. """ from __future__ import annotations @@ -55,7 +60,8 @@ def upgrade() -> None: """Replace the owner_isolation policy on `entries` to allow reads of - `_system`-owned schema rows from any owner context.""" + `_system`-owned schema rows from any owner context while keeping + writes strictly isolated via an explicit WITH CHECK clause.""" op.execute("DROP POLICY IF EXISTS owner_isolation ON entries") op.execute(""" CREATE POLICY owner_isolation ON entries @@ -63,6 +69,7 @@ def upgrade() -> None: owner_id = current_setting('app.current_user', true) OR (owner_id = '_system' AND type = 'schema') ) + WITH CHECK (owner_id = current_setting('app.current_user', true)) """) diff --git a/tests/test_rls.py b/tests/test_rls.py index 8f1b44a..a5803b1 100644 --- a/tests/test_rls.py +++ b/tests/test_rls.py @@ -50,7 +50,10 @@ def rls_store(pg_dsn: str) -> PostgresStore: # Create policies — entries gets the _system-schema read carve-out # added in migration n9i0j1k2l3m4 so non-privileged owners can see - # built-in schemas. Other tables keep strict owner isolation. + # built-in schemas. The WITH CHECK clause is explicit because + # permissive policies combine with OR, and without it the USING + # clause would leak into the write path (PR #287 Round-3 finding). + # Other tables keep strict owner isolation. if table == "entries": cur.execute(f""" CREATE POLICY owner_isolation ON {table} @@ -58,6 +61,7 @@ def rls_store(pg_dsn: str) -> PostgresStore: owner_id = current_setting('app.current_user', true) OR (owner_id = '_system' AND type = 'schema') ) + WITH CHECK (owner_id = current_setting('app.current_user', true)) """) else: cur.execute(f""" @@ -294,11 +298,100 @@ def test_system_non_schema_rows_remain_invisible(self, rls_store: PostgresStore) alice_view = rls_store.get_entries("alice", tags=["rls-sys-note"]) assert alice_view == [] - # NOTE: A WITH-CHECK negative case ("alice cannot INSERT with - # owner_id='_system'") is intentionally NOT asserted here. The testcontainers - # postgres role does not reliably reject the raw INSERT even under FORCE ROW - # LEVEL SECURITY, so the test would not exercise the policy against the real - # production (non-superuser) role. The SELECT-side carve-out is the - # load-bearing behavior change in this migration and is covered by the three - # cases above; production WITH-CHECK enforcement is covered by the - # non-superuser repro documented in PR #287 round-2 QA feedback. + def test_nonsuperuser_cannot_insert_as_system(self, rls_store: PostgresStore) -> None: + """Non-privileged owners must not be able to write to `_system`. + + This exercises the WITH CHECK clause against a real non-superuser role + — the production deployment target. Container superusers have + BYPASSRLS implicitly, so the raw INSERT against the default role + would silently succeed and leave the policy untested. We create a + NOSUPERUSER NOBYPASSRLS role, GRANT only what's needed, then + ``SET LOCAL ROLE`` onto it for the duration of the test transaction. + + Regression for PR #287 Round-3: the original migration omitted the + explicit WITH CHECK, so the `_system`-schema carve-out in USING + leaked into INSERT/UPDATE via the FOR ALL permissive policy. + """ + from mcp_awareness.schema import Entry + + entry = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="impostor", + tags=[], + created=now_utc(), + expires=None, + data={"family": "schema:pwned", "version": "1.0.0", "schema": {"type": "object"}}, + logical_key="schema:pwned:1.0.0", + ) + + # Provision the non-superuser role once per test (idempotent). Use a + # separate connection so the CREATE/GRANT commits regardless of the + # main test transaction's outcome. + with rls_store._pool.connection() as conn, conn.cursor() as cur: + conn.autocommit = True + cur.execute( + "DO $$ BEGIN " + "IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='rls_prod_sim') THEN " + " CREATE ROLE rls_prod_sim NOSUPERUSER NOBYPASSRLS NOINHERIT; " + "END IF; END $$" + ) + cur.execute("GRANT USAGE ON SCHEMA public TO rls_prod_sim") + cur.execute("GRANT SELECT, INSERT, UPDATE, DELETE ON entries TO rls_prod_sim") + + # Now run the actual test inside a transaction as the simulated prod role. + with ( + pytest.raises(psycopg.errors.InsufficientPrivilege), + rls_store._pool.connection() as conn, + conn.transaction(), + conn.cursor() as cur, + ): + cur.execute("SET LOCAL ROLE rls_prod_sim") + cur.execute("SELECT set_config('app.current_user', 'alice', true)") + cur.execute( + "INSERT INTO entries (id, owner_id, type, source, created, tags, data," + " logical_key, language) VALUES (%s, '_system', 'schema', %s, now(), '[]'," + " %s::jsonb, %s, 'english')", + (entry.id, entry.source, '{"family": "schema:pwned"}', entry.logical_key), + ) + + def test_nonsuperuser_cannot_update_system_schema(self, rls_store: PostgresStore) -> None: + """Same WITH CHECK guard — an existing `_system` schema row cannot be + tampered with by a non-privileged owner via UPDATE.""" + from mcp_awareness.schema import Entry + + seed = Entry( + id=make_id(), + type=EntryType.SCHEMA, + source="system-bootstrap", + tags=[], + created=now_utc(), + expires=None, + data={"family": "schema:readonly", "version": "1.0.0", "schema": {"type": "object"}}, + logical_key="schema:readonly:1.0.0", + ) + rls_store.add("_system", seed) + + with rls_store._pool.connection() as conn, conn.cursor() as cur: + conn.autocommit = True + cur.execute( + "DO $$ BEGIN " + "IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='rls_prod_sim') THEN " + " CREATE ROLE rls_prod_sim NOSUPERUSER NOBYPASSRLS NOINHERIT; " + "END IF; END $$" + ) + cur.execute("GRANT USAGE ON SCHEMA public TO rls_prod_sim") + cur.execute("GRANT SELECT, INSERT, UPDATE, DELETE ON entries TO rls_prod_sim") + + with ( + pytest.raises(psycopg.errors.InsufficientPrivilege), + rls_store._pool.connection() as conn, + conn.transaction(), + conn.cursor() as cur, + ): + cur.execute("SET LOCAL ROLE rls_prod_sim") + cur.execute("SELECT set_config('app.current_user', 'alice', true)") + cur.execute( + "UPDATE entries SET data = data || '{\"tampered\": true}'::jsonb" + " WHERE owner_id = '_system' AND type = 'schema'" + )