diff --git a/CHANGELOG.md b/CHANGELOG.md index e840873..8f0a37b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.6.0] - 2026-04-06 + +### Added +- `vault.get_content(resource_id)` — retrieve full text content (reassembles chunks) +- `vault.replace(resource_id, new_content)` — atomic content replacement with auto-supersession +- `vault.get_provenance(resource_id)` — retrieve provenance records for a resource +- `vault.set_adversarial_status(resource_id, status)` — persist adversarial verification status +- `adversarial_status` column in storage schemas (persisted, was RAM-only) +- `provenance` table in storage schemas (persisted, was RAM-only) +- `updated_at`, `resource_type`, `data_classification` fields on `SearchResult` model +- Layer `search_boost` applied in ranking (OPERATIONAL 1.5x, STRATEGIC 1.0x) + +### Fixed +- **Freshness decay**: was hardcoded to 1.0, now computed from `updated_at` with per-tier half-life +- **Layer search_boost**: defined per layer but never applied in `apply_trust_weighting()` + +### Changed +- README badges corrected: removed undelivered encryption/FIPS claims, fixed test count +- Encryption (`[encryption]`) and docling (`[docling]`) extras marked as "planned v0.8" + ## [0.5.0] - 2026-04-06 ### Added diff --git a/README.md b/README.md index 719a417..c1793fe 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,12 @@ **The governed knowledge store for autonomous organizations.** -Every document has a trust tier that weights search results. Every chunk has a SHA3-256 content ID. Every mutation is auditable. The entire vault is verifiable via Merkle tree. Air-gap native. Post-quantum ready. +Every document has a trust tier that weights search results. Every chunk has a SHA3-256 content ID. Every mutation is auditable. The entire vault is verifiable via Merkle tree. Air-gap native. [![Python](https://img.shields.io/badge/Python-3.12+-3776AB.svg)](https://www.python.org/) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![Tests](https://img.shields.io/badge/Tests-375_passing-brightgreen.svg)](tests/) -[![Security](https://img.shields.io/badge/Security-100%2F100-brightgreen.svg)](docs/security.md) -[![FIPS](https://img.shields.io/badge/Crypto-SHA3--256%20%C2%B7%20Ed25519%20%C2%B7%20ML--KEM--768-purple.svg)](#security) +[![Tests](https://img.shields.io/badge/Tests-448_passing-brightgreen.svg)](tests/) +[![Crypto](https://img.shields.io/badge/Crypto-SHA3--256%20%C2%B7%20Ed25519-purple.svg)](#security) @@ -138,8 +137,8 @@ pip install qp-vault | `pip install qp-vault` | SQLite, trust search, CAS, Merkle, lifecycle | **1** (pydantic) | | `pip install qp-vault[postgres]` | + PostgreSQL + pgvector hybrid search | + sqlalchemy, asyncpg, pgvector | | `pip install qp-vault[capsule]` | + Cryptographic audit trail | + [qp-capsule](https://github.com/quantumpipes/capsule) | -| `pip install qp-vault[docling]` | + 25+ format document processing | + docling | -| `pip install qp-vault[encryption]` | + AES-256-GCM + ML-KEM-768 at rest | + cryptography, pynacl | +| `pip install qp-vault[docling]` | + 25+ format document processing (planned v0.8) | + docling | +| `pip install qp-vault[encryption]` | + AES-256-GCM encryption at rest (planned v0.8) | + cryptography, pynacl | | `pip install qp-vault[fastapi]` | + REST API (15+ endpoints) | + fastapi | | `pip install qp-vault[cli]` | + `vault` command-line tool | + typer, rich | | `pip install qp-vault[all]` | Everything | All of the above | @@ -247,7 +246,7 @@ app.include_router(router, prefix="/v1/vault") |---|---|---|---| | Content integrity | SHA3-256 | FIPS 202 | Tamper-evident CIDs and Merkle roots | | Audit signatures | Ed25519 + ML-DSA-65 | FIPS 186-5, FIPS 204 | Via [qp-capsule](https://github.com/quantumpipes/capsule) (optional) | -| Encryption at rest | AES-256-GCM + ML-KEM-768 | FIPS 197, FIPS 203 | Post-quantum key exchange (optional) | +| Encryption at rest | AES-256-GCM (planned v0.8) | FIPS 197 | Post-quantum key exchange (planned) | | Search integrity | Parameterized SQL | -- | No string interpolation, FTS5 sanitized | | Input validation | Pydantic + custom | -- | Enum checks, name/tag/metadata limits | diff --git a/pyproject.toml b/pyproject.toml index 90f6e72..f59f56b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "qp-vault" -version = "0.5.0" +version = "0.6.0" description = "Governed knowledge store for autonomous organizations. Trust tiers, cryptographic audit trails, content-addressed storage, air-gap native." readme = "README.md" license = "Apache-2.0" diff --git a/src/qp_vault/__init__.py b/src/qp_vault/__init__.py index 83c15a1..39eecd7 100644 --- a/src/qp_vault/__init__.py +++ b/src/qp_vault/__init__.py @@ -26,7 +26,7 @@ Docs: https://github.com/quantumpipes/vault """ -__version__ = "0.5.0" +__version__ = "0.6.0" __author__ = "Quantum Pipes Technologies, LLC" __license__ = "Apache-2.0" diff --git a/src/qp_vault/core/layer_manager.py b/src/qp_vault/core/layer_manager.py index 5f78af9..8f0da34 100644 --- a/src/qp_vault/core/layer_manager.py +++ b/src/qp_vault/core/layer_manager.py @@ -182,6 +182,7 @@ async def search( query, top_k=top_k, layer=self._layer, + _layer_boost=self._layer_config.search_boost, **kwargs, ) diff --git a/src/qp_vault/core/search_engine.py b/src/qp_vault/core/search_engine.py index 58ad94b..d1bdec6 100644 --- a/src/qp_vault/core/search_engine.py +++ b/src/qp_vault/core/search_engine.py @@ -157,10 +157,12 @@ def compute_freshness( def apply_trust_weighting( results: list[SearchResult], config: VaultConfig | None = None, + *, + layer_boost: float = 1.0, ) -> list[SearchResult]: - """Apply 2D trust weights and freshness decay to search results. + """Apply 2D trust weights, freshness decay, and layer boost to search results. - Computes composite relevance = raw * organizational_trust * adversarial_multiplier * freshness. + Computes composite relevance = raw * organizational_trust * adversarial_multiplier * freshness * layer_boost. Re-sorts results by composite score (highest first). Args: @@ -181,12 +183,13 @@ def apply_trust_weighting( adv_str = adv_status.value if hasattr(adv_status, "value") else str(adv_status or "unverified") adv_mult = compute_adversarial_multiplier(adv_str) - # Freshness: we don't have updated_at on SearchResult, use 1.0 for now - freshness = 1.0 + # Freshness: compute from resource updated_at timestamp + result_updated = getattr(result, "updated_at", None) + freshness = compute_freshness(result_updated, tier, config) if result_updated else 1.0 - # Composite score: raw * organizational_trust * adversarial_verification * freshness + # Composite score: raw * organizational_trust * adversarial_verification * freshness * layer_boost raw = result.relevance - composite = raw * tw * adv_mult * freshness + composite = raw * tw * adv_mult * freshness * layer_boost weighted.append( result.model_copy( diff --git a/src/qp_vault/models.py b/src/qp_vault/models.py index 71ba424..59df167 100644 --- a/src/qp_vault/models.py +++ b/src/qp_vault/models.py @@ -117,6 +117,12 @@ class SearchResult(BaseModel): freshness: float = 1.0 relevance: float = 0.0 + # Resource metadata (for ranking and display) + updated_at: str | None = None + created_at: str | None = None + resource_type: str | None = None + data_classification: str | None = None + # Provenance trust_tier: TrustTier = TrustTier.WORKING adversarial_status: AdversarialStatus = AdversarialStatus.UNVERIFIED diff --git a/src/qp_vault/protocols.py b/src/qp_vault/protocols.py index cb94513..75262a3 100644 --- a/src/qp_vault/protocols.py +++ b/src/qp_vault/protocols.py @@ -41,6 +41,7 @@ class ResourceUpdate: tags: list[str] | None = None metadata: dict[str, Any] | None = None lifecycle: str | None = None + adversarial_status: str | None = None valid_from: str | None = None valid_until: str | None = None supersedes: str | None = None diff --git a/src/qp_vault/storage/sqlite.py b/src/qp_vault/storage/sqlite.py index 6b5ecf3..f67f608 100644 --- a/src/qp_vault/storage/sqlite.py +++ b/src/qp_vault/storage/sqlite.py @@ -33,6 +33,7 @@ resource_type TEXT NOT NULL DEFAULT 'document', status TEXT NOT NULL DEFAULT 'pending', lifecycle TEXT NOT NULL DEFAULT 'active', + adversarial_status TEXT NOT NULL DEFAULT 'unverified', valid_from TEXT, valid_until TEXT, supersedes TEXT, @@ -93,8 +94,23 @@ CREATE INDEX IF NOT EXISTS idx_resources_collection ON resources(collection_id); CREATE INDEX IF NOT EXISTS idx_resources_layer ON resources(layer); CREATE INDEX IF NOT EXISTS idx_resources_hash ON resources(content_hash); +CREATE TABLE IF NOT EXISTS provenance ( + id TEXT PRIMARY KEY, + resource_id TEXT NOT NULL, + uploader_id TEXT, + upload_method TEXT, + source_description TEXT DEFAULT '', + original_hash TEXT NOT NULL, + provenance_signature TEXT, + signature_verified INTEGER DEFAULT 0, + created_at TEXT NOT NULL, + FOREIGN KEY (resource_id) REFERENCES resources(id) ON DELETE CASCADE +); + CREATE INDEX IF NOT EXISTS idx_chunks_resource ON chunks(resource_id); CREATE INDEX IF NOT EXISTS idx_chunks_cid ON chunks(cid); +CREATE INDEX IF NOT EXISTS idx_provenance_resource ON provenance(resource_id); +CREATE INDEX IF NOT EXISTS idx_resources_adversarial ON resources(adversarial_status); """ _FTS_SCHEMA = """ @@ -145,6 +161,11 @@ def _cosine_similarity(a: list[float], b: list[float]) -> float: return dot / (norm_a * norm_b) +def _enum_val(v: Any) -> str: + """Extract .value from enum, or return str directly.""" + return v.value if hasattr(v, "value") else str(v) + + def _resource_from_row(row: dict[str, Any]) -> Resource: """Convert a SQLite row dict to a Resource model.""" data = dict(row) @@ -190,14 +211,14 @@ async def store_resource(self, resource: Resource) -> str: """INSERT INTO resources ( id, name, content_hash, cid, merkle_root, trust_tier, data_classification, resource_type, - status, lifecycle, valid_from, valid_until, + status, lifecycle, adversarial_status, valid_from, valid_until, supersedes, superseded_by, collection_id, layer, tags, metadata, mime_type, size_bytes, chunk_count, created_at, updated_at, indexed_at, deleted_at ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, - ?, ?, ?, ?, + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? @@ -213,6 +234,7 @@ async def store_resource(self, resource: Resource) -> str: resource.resource_type.value if hasattr(resource.resource_type, "value") else resource.resource_type, resource.status.value if hasattr(resource.status, "value") else resource.status, resource.lifecycle.value if hasattr(resource.lifecycle, "value") else resource.lifecycle, + _enum_val(getattr(resource, "adversarial_status", "unverified")), str(resource.valid_from) if resource.valid_from else None, str(resource.valid_until) if resource.valid_until else None, resource.supersedes, @@ -289,7 +311,7 @@ async def update_resource(self, resource_id: str, updates: ResourceUpdate) -> Re for field_name in ( "name", "trust_tier", "data_classification", "lifecycle", - "valid_from", "valid_until", "supersedes", "superseded_by", + "adversarial_status", "valid_from", "valid_until", "supersedes", "superseded_by", ): val = getattr(updates, field_name, None) if val is not None: @@ -398,7 +420,8 @@ async def search(self, query: SearchQuery) -> list[SearchResult]: f"SELECT c.rowid as chunk_rowid, c.id as chunk_id, c.resource_id," # nosec B608 f" c.content, c.cid as chunk_cid, c.embedding," f" c.page_number, c.section_title, c.chunk_index," - f" r.name as resource_name, r.trust_tier, r.lifecycle" + f" r.name as resource_name, r.trust_tier, r.lifecycle," + f" r.updated_at as resource_updated_at, r.resource_type, r.data_classification" f" FROM chunks c JOIN resources r ON c.resource_id = r.id" f" WHERE {where_clause} ORDER BY c.chunk_index" ) @@ -457,6 +480,9 @@ async def search(self, query: SearchQuery) -> list[SearchResult]: trust_tier=TrustTier(row_dict["trust_tier"]), cid=row_dict["chunk_cid"], lifecycle=row_dict["lifecycle"], + updated_at=row_dict.get("resource_updated_at"), + resource_type=row_dict.get("resource_type"), + data_classification=row_dict.get("data_classification"), relevance=raw_score, ) ) @@ -500,6 +526,43 @@ async def get_chunks_for_resource(self, resource_id: str) -> list[Chunk]: result.append(Chunk(**d)) return result + async def store_provenance( + self, + provenance_id: str, + resource_id: str, + uploader_id: str | None, + upload_method: str | None, + source_description: str, + original_hash: str, + signature: str | None, + verified: bool, + created_at: str, + ) -> None: + """Store a provenance record.""" + conn = self._get_conn() + conn.execute( + """INSERT INTO provenance ( + id, resource_id, uploader_id, upload_method, + source_description, original_hash, provenance_signature, + signature_verified, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + provenance_id, resource_id, uploader_id, upload_method, + source_description, original_hash, signature, + 1 if verified else 0, created_at, + ), + ) + conn.commit() + + async def get_provenance(self, resource_id: str) -> list[dict[str, Any]]: + """Get all provenance records for a resource.""" + conn = self._get_conn() + rows = conn.execute( + "SELECT * FROM provenance WHERE resource_id = ? ORDER BY created_at", + (resource_id,), + ).fetchall() + return [dict(r) for r in rows] + async def close(self) -> None: """Close the database connection.""" if self._conn: diff --git a/src/qp_vault/vault.py b/src/qp_vault/vault.py index a93e692..153cab9 100644 --- a/src/qp_vault/vault.py +++ b/src/qp_vault/vault.py @@ -385,6 +385,87 @@ async def delete(self, resource_id: str, *, hard: bool = False) -> None: await self._ensure_initialized() await self._resource_manager.delete(resource_id, hard=hard) + async def get_content(self, resource_id: str) -> str: + """Retrieve the full text content of a resource. + + Reassembles chunks in order to reconstruct the original text. + + Args: + resource_id: The resource to retrieve content for. + + Returns: + The full text content, with chunks joined by newlines. + """ + await self._ensure_initialized() + chunks = await self._storage.get_chunks_for_resource(resource_id) + if not chunks: + raise VaultError(f"No content found for resource {resource_id}") + sorted_chunks = sorted(chunks, key=lambda c: c.chunk_index) + return "\n\n".join(c.content for c in sorted_chunks) + + async def replace( + self, + resource_id: str, + new_content: str, + *, + reason: str | None = None, + ) -> tuple[Resource, Resource]: + """Replace a resource's content atomically. + + Creates a new resource with the new content and supersedes the old one. + The old resource transitions to SUPERSEDED. + + Args: + resource_id: The resource to replace. + new_content: The new text content. + reason: Optional reason for the replacement. + + Returns: + Tuple of (old_resource, new_resource). + """ + await self._ensure_initialized() + old_resource = await self.get(resource_id) + + # Create new version with same metadata + new_resource = await self.add( + new_content, + name=old_resource.name, + trust=old_resource.trust_tier, + classification=old_resource.data_classification, + layer=old_resource.layer, + collection=old_resource.collection_id, + tags=old_resource.tags, + metadata=old_resource.metadata, + ) + + # Supersede old with new + return await self.supersede(resource_id, new_resource.id) + + async def get_provenance(self, resource_id: str) -> list[dict[str, Any]]: + """Get all provenance records for a resource. + + Returns: + List of provenance records in chronological order. + """ + await self._ensure_initialized() + return await self._storage.get_provenance(resource_id) + + async def set_adversarial_status(self, resource_id: str, status: str) -> Resource: + """Set the adversarial verification status of a resource. + + Args: + resource_id: The resource to update. + status: One of 'unverified', 'verified', 'suspicious'. + + Returns: + Updated resource. + """ + await self._ensure_initialized() + from qp_vault.protocols import ResourceUpdate + return await self._storage.update_resource( + resource_id, ResourceUpdate(adversarial_status=status) + ) + # --- Lifecycle --- async def transition( @@ -436,6 +517,7 @@ async def search( layer: MemoryLayer | str | None = None, collection: str | None = None, as_of: date | None = None, + _layer_boost: float = 1.0, ) -> list[SearchResult]: """Trust-weighted hybrid search. @@ -479,8 +561,8 @@ async def search( # Get raw results from storage raw_results = await self._storage.search(search_query) - # Apply trust weighting - weighted = apply_trust_weighting(raw_results, self.config) + # Apply trust weighting with optional layer boost + weighted = apply_trust_weighting(raw_results, self.config, layer_boost=_layer_boost) # Apply threshold after trust weighting filtered = [r for r in weighted if r.relevance >= threshold] @@ -748,6 +830,26 @@ def delete(self, resource_id: str, *, hard: bool = False) -> None: """Delete a resource.""" return _run_async(self._async.delete(resource_id, hard=hard)) + def get_content(self, resource_id: str) -> str: + """Retrieve the full text content of a resource.""" + result: str = _run_async(self._async.get_content(resource_id)) + return result + + def replace(self, resource_id: str, new_content: str, *, reason: str | None = None) -> tuple[Resource, Resource]: + """Replace a resource's content atomically. Returns (old, new).""" + result: tuple[Resource, Resource] = _run_async(self._async.replace(resource_id, new_content, reason=reason)) + return result + + def get_provenance(self, resource_id: str) -> list[dict[str, Any]]: + """Get provenance records for a resource.""" + result: list[dict[str, Any]] = _run_async(self._async.get_provenance(resource_id)) + return result + + def set_adversarial_status(self, resource_id: str, status: str) -> Resource: + """Set adversarial verification status.""" + result: Resource = _run_async(self._async.set_adversarial_status(resource_id, status)) + return result + def transition(self, resource_id: str, target: Lifecycle | str, *, reason: str | None = None) -> Resource: """Transition lifecycle state.""" return _run_async(self._async.transition(resource_id, target, reason=reason))