diff --git a/CHANGELOG.md b/CHANGELOG.md index ad7657b..06ee21a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.12.0] - 2026-04-06 + +### Added +- **Post-quantum cryptography (delivered)**: + - `MLKEMKeyManager` — ML-KEM-768 key encapsulation (FIPS 203) + - `MLDSASigner` — ML-DSA-65 digital signatures (FIPS 204) + - `HybridEncryptor` — ML-KEM-768 + AES-256-GCM hybrid encryption + - `[pq]` installation extra: `pip install qp-vault[pq]` +- **Input bounds**: `top_k` capped at 1000, `threshold` range 0-1, query max 10K chars +- **Batch limits**: max 100 items per `/batch` request +- **Plugin hash verification**: `manifest.json` with SHA3-256 hashes in plugins_dir +- **Tenant-locked vault**: `Vault(path, tenant_id="x")` enforces single-tenant scope + +### Security +- SearchRequest Pydantic validators prevent unbounded parameter attacks +- Plugin files verified against manifest before execution + ## [0.11.0] - 2026-04-06 ### Added diff --git a/pyproject.toml b/pyproject.toml index a74beaa..f0e39e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "qp-vault" -version = "0.11.0" +version = "0.12.0" description = "Governed knowledge store for autonomous organizations. Trust tiers, cryptographic audit trails, content-addressed storage, air-gap native." readme = "README.md" license = "Apache-2.0" @@ -68,6 +68,9 @@ encryption = [ "cryptography>=42", "pynacl>=1.6.2", ] +pq = [ + "liboqs-python>=0.14.1", +] integrity = [ "numpy>=2.0", ] @@ -87,7 +90,7 @@ dev = [ "ruff>=0.9", ] all = [ - "qp-vault[sqlite,postgres,docling,local,openai,capsule,encryption,integrity,fastapi,cli]", + "qp-vault[sqlite,postgres,docling,local,openai,capsule,encryption,pq,integrity,fastapi,cli]", ] [project.scripts] diff --git a/src/qp_vault/__init__.py b/src/qp_vault/__init__.py index 641864b..cf3cfc4 100644 --- a/src/qp_vault/__init__.py +++ b/src/qp_vault/__init__.py @@ -26,7 +26,7 @@ Docs: https://github.com/quantumpipes/vault """ -__version__ = "0.11.0" +__version__ = "0.12.0" __author__ = "Quantum Pipes Technologies, LLC" __license__ = "Apache-2.0" diff --git a/src/qp_vault/encryption/__init__.py b/src/qp_vault/encryption/__init__.py index 9301959..6c353aa 100644 --- a/src/qp_vault/encryption/__init__.py +++ b/src/qp_vault/encryption/__init__.py @@ -3,10 +3,26 @@ """Encryption at rest for qp-vault. -Provides AES-256-GCM symmetric encryption for chunk content. -Requires: pip install qp-vault[encryption] +Classical: AES-256-GCM (FIPS 197) +Post-quantum: ML-KEM-768 key encapsulation (FIPS 203) + ML-DSA-65 signatures (FIPS 204) +Hybrid: ML-KEM-768 + AES-256-GCM (quantum-resistant data encryption) + +Install: + pip install qp-vault[encryption] # AES-256-GCM only + pip install qp-vault[pq] # + ML-KEM-768 + ML-DSA-65 + pip install qp-vault[encryption,pq] # Full hybrid encryption """ from qp_vault.encryption.aes_gcm import AESGCMEncryptor __all__ = ["AESGCMEncryptor"] + +# Conditional PQ exports (available when liboqs-python installed) +try: + from qp_vault.encryption.hybrid import HybridEncryptor + from qp_vault.encryption.ml_dsa import MLDSASigner + from qp_vault.encryption.ml_kem import MLKEMKeyManager + + __all__ += ["MLKEMKeyManager", "MLDSASigner", "HybridEncryptor"] # type: ignore[assignment] +except ImportError: + pass diff --git a/src/qp_vault/encryption/hybrid.py b/src/qp_vault/encryption/hybrid.py new file mode 100644 index 0000000..8faa6db --- /dev/null +++ b/src/qp_vault/encryption/hybrid.py @@ -0,0 +1,94 @@ +# Copyright 2026 Quantum Pipes Technologies, LLC +# SPDX-License-Identifier: Apache-2.0 + +"""Hybrid encryption: ML-KEM-768 key exchange + AES-256-GCM data encryption. + +Combines post-quantum key encapsulation (FIPS 203) with classical +symmetric encryption (FIPS 197) for defense-in-depth: + +1. ML-KEM-768 encapsulates a shared secret (32 bytes) +2. Shared secret is used as AES-256-GCM key +3. Data is encrypted with AES-256-GCM + +Format: kem_ciphertext_len (4 bytes) || kem_ciphertext || aes_nonce (12) || aes_ciphertext || aes_tag (16) + +Requires: pip install qp-vault[pq,encryption] +""" + +from __future__ import annotations + +import struct + +from qp_vault.encryption.aes_gcm import AESGCMEncryptor +from qp_vault.encryption.ml_kem import MLKEMKeyManager + + +class HybridEncryptor: + """ML-KEM-768 + AES-256-GCM hybrid encryption. + + Provides quantum-resistant data encryption by wrapping AES keys + with ML-KEM-768 key encapsulation. + + Usage: + enc = HybridEncryptor() + pub, sec = enc.generate_keypair() + ciphertext = enc.encrypt(b"secret data", pub) + plaintext = enc.decrypt(ciphertext, sec) + """ + + def __init__(self) -> None: + self._kem = MLKEMKeyManager() + + def generate_keypair(self) -> tuple[bytes, bytes]: + """Generate an ML-KEM-768 keypair for hybrid encryption. + + Returns: + (public_key, secret_key) — store secret_key securely. + """ + return self._kem.generate_keypair() + + def encrypt(self, plaintext: bytes, public_key: bytes) -> bytes: + """Encrypt data with hybrid ML-KEM-768 + AES-256-GCM. + + Args: + plaintext: Data to encrypt. + public_key: ML-KEM-768 public key. + + Returns: + Hybrid ciphertext: kem_ct_len(4) || kem_ct || aes_encrypted + """ + # Step 1: ML-KEM-768 key encapsulation -> shared secret (32 bytes) + kem_ciphertext, shared_secret = self._kem.encapsulate(public_key) + + # Step 2: AES-256-GCM encrypt with the shared secret as key + aes = AESGCMEncryptor(key=shared_secret[:32]) + aes_encrypted = aes.encrypt(plaintext) + + # Step 3: Pack: kem_ct_len || kem_ct || aes_encrypted + return struct.pack(">I", len(kem_ciphertext)) + kem_ciphertext + aes_encrypted + + def decrypt(self, data: bytes, secret_key: bytes) -> bytes: + """Decrypt hybrid ML-KEM-768 + AES-256-GCM ciphertext. + + Args: + data: Hybrid ciphertext from encrypt(). + secret_key: ML-KEM-768 secret key. + + Returns: + Decrypted plaintext. + """ + # Step 1: Unpack kem_ct_len + if len(data) < 4: + raise ValueError("Hybrid ciphertext too short") + kem_ct_len = struct.unpack(">I", data[:4])[0] + + # Step 2: Extract KEM ciphertext and AES ciphertext + kem_ciphertext = data[4 : 4 + kem_ct_len] + aes_encrypted = data[4 + kem_ct_len :] + + # Step 3: ML-KEM-768 decapsulation -> shared secret + shared_secret = self._kem.decapsulate(kem_ciphertext, secret_key) + + # Step 4: AES-256-GCM decrypt + aes = AESGCMEncryptor(key=shared_secret[:32]) + return aes.decrypt(aes_encrypted) diff --git a/src/qp_vault/encryption/ml_dsa.py b/src/qp_vault/encryption/ml_dsa.py new file mode 100644 index 0000000..eacf1d4 --- /dev/null +++ b/src/qp_vault/encryption/ml_dsa.py @@ -0,0 +1,80 @@ +# Copyright 2026 Quantum Pipes Technologies, LLC +# SPDX-License-Identifier: Apache-2.0 + +"""ML-DSA-65 post-quantum signatures for provenance attestation. + +Signs and verifies data using ML-DSA-65 (FIPS 204), providing +quantum-resistant digital signatures for provenance records, +Merkle proofs, and audit attestations. + +Requires: pip install qp-vault[pq] +""" + +from __future__ import annotations + +try: + import oqs + HAS_OQS = True +except ImportError: + HAS_OQS = False + + +class MLDSASigner: + """ML-DSA-65 digital signature manager (FIPS 204). + + Generates keypairs, signs data, and verifies signatures. + Used for provenance attestation and audit record signing. + + Usage: + signer = MLDSASigner() + pub, sec = signer.generate_keypair() + signature = signer.sign(b"data", sec) + assert signer.verify(b"data", signature, pub) + """ + + ALGORITHM = "ML-DSA-65" + + def __init__(self) -> None: + if not HAS_OQS: + raise ImportError( + "liboqs-python is required for ML-DSA-65. " + "Install with: pip install qp-vault[pq]" + ) + + def generate_keypair(self) -> tuple[bytes, bytes]: + """Generate an ML-DSA-65 keypair. + + Returns: + (public_key, secret_key) as bytes. + """ + sig = oqs.Signature(self.ALGORITHM) + public_key = sig.generate_keypair() + secret_key = sig.export_secret_key() + return public_key, secret_key + + def sign(self, message: bytes, secret_key: bytes) -> bytes: + """Sign a message with ML-DSA-65. + + Args: + message: The data to sign. + secret_key: ML-DSA-65 secret key. + + Returns: + The signature bytes. + """ + sig = oqs.Signature(self.ALGORITHM, secret_key=secret_key) + return sig.sign(message) + + def verify(self, message: bytes, signature: bytes, public_key: bytes) -> bool: + """Verify an ML-DSA-65 signature. + + Args: + message: The original signed data. + signature: The signature to verify. + public_key: ML-DSA-65 public key. + + Returns: + True if signature is valid. + """ + sig = oqs.Signature(self.ALGORITHM) + return sig.verify(message, signature, public_key) diff --git a/src/qp_vault/encryption/ml_kem.py b/src/qp_vault/encryption/ml_kem.py new file mode 100644 index 0000000..84f66f9 --- /dev/null +++ b/src/qp_vault/encryption/ml_kem.py @@ -0,0 +1,81 @@ +# Copyright 2026 Quantum Pipes Technologies, LLC +# SPDX-License-Identifier: Apache-2.0 + +"""ML-KEM-768 key encapsulation for post-quantum key exchange. + +Wraps AES-256-GCM data encryption keys (DEK) with ML-KEM-768 (FIPS 203). +The encapsulated key can only be decapsulated by the holder of the +ML-KEM-768 secret key, providing quantum-resistant key protection. + +Requires: pip install qp-vault[pq] +""" + +from __future__ import annotations + +try: + import oqs + HAS_OQS = True +except ImportError: + HAS_OQS = False + + +class MLKEMKeyManager: + """ML-KEM-768 key encapsulation manager (FIPS 203). + + Generates keypairs, encapsulates shared secrets, and decapsulates + them. Used to wrap AES-256-GCM keys for post-quantum protection. + + Usage: + km = MLKEMKeyManager() + pub, sec = km.generate_keypair() + ciphertext, shared_secret = km.encapsulate(pub) + recovered = km.decapsulate(ciphertext, sec) + assert shared_secret == recovered + """ + + ALGORITHM = "ML-KEM-768" + + def __init__(self) -> None: + if not HAS_OQS: + raise ImportError( + "liboqs-python is required for ML-KEM-768. " + "Install with: pip install qp-vault[pq]" + ) + + def generate_keypair(self) -> tuple[bytes, bytes]: + """Generate an ML-KEM-768 keypair. + + Returns: + (public_key, secret_key) as bytes. + """ + kem = oqs.KeyEncapsulation(self.ALGORITHM) + public_key = kem.generate_keypair() + secret_key = kem.export_secret_key() + return public_key, secret_key + + def encapsulate(self, public_key: bytes) -> tuple[bytes, bytes]: + """Encapsulate a shared secret using a public key. + + Args: + public_key: ML-KEM-768 public key. + + Returns: + (ciphertext, shared_secret) — ciphertext is sent to key holder, + shared_secret is used as AES-256-GCM key. + """ + kem = oqs.KeyEncapsulation(self.ALGORITHM) + ciphertext, shared_secret = kem.encap_secret(public_key) + return ciphertext, shared_secret + + def decapsulate(self, ciphertext: bytes, secret_key: bytes) -> bytes: + """Decapsulate a shared secret using the secret key. + + Args: + ciphertext: The encapsulated ciphertext from encapsulate(). + secret_key: ML-KEM-768 secret key. + + Returns: + The shared secret (same as returned by encapsulate). + """ + kem = oqs.KeyEncapsulation(self.ALGORITHM, secret_key=secret_key) + return kem.decap_secret(ciphertext) diff --git a/src/qp_vault/integrations/fastapi_routes.py b/src/qp_vault/integrations/fastapi_routes.py index 0b633b4..02b7ce3 100644 --- a/src/qp_vault/integrations/fastapi_routes.py +++ b/src/qp_vault/integrations/fastapi_routes.py @@ -48,9 +48,9 @@ class AddResourceRequest(BaseModel): lifecycle: str = "active" class SearchRequest(BaseModel): - query: str - top_k: int = 10 - threshold: float = 0.0 + query: str = Field(..., max_length=10000) + top_k: int = Field(10, ge=1, le=1000) + threshold: float = Field(0.0, ge=0.0, le=1.0) trust_min: str | None = None layer: str | None = None collection: str | None = None @@ -293,6 +293,8 @@ async def search_faceted(req: SearchRequest) -> dict[str, Any]: @router.post("/batch") async def add_batch(req: dict[str, Any]) -> dict[str, Any]: sources = req.get("sources", []) + if len(sources) > 100: + raise HTTPException(status_code=400, detail="Batch limited to 100 items") trust = req.get("trust", "working") tenant_id = req.get("tenant_id") resources = await vault.add_batch( diff --git a/src/qp_vault/plugins/registry.py b/src/qp_vault/plugins/registry.py index 377c911..0f3d451 100644 --- a/src/qp_vault/plugins/registry.py +++ b/src/qp_vault/plugins/registry.py @@ -127,18 +127,41 @@ def discover_entry_points(self) -> None: except Exception as e: logger.debug("Entry point group %s unavailable: %s", group, e) - def discover_plugins_dir(self, plugins_dir: Path) -> None: + def discover_plugins_dir(self, plugins_dir: Path, *, verify_hashes: bool = True) -> None: """Load plugins from a local directory (air-gap mode). Any .py file in the directory is imported. Classes decorated with @embedder, @parser, or @policy are auto-registered. + + If a manifest.json exists in the directory, plugin files are verified + against SHA3-256 hashes before loading. """ if not plugins_dir.is_dir(): return + # Load manifest for hash verification + manifest: dict[str, str] = {} + manifest_path = plugins_dir / "manifest.json" + if verify_hashes and manifest_path.exists(): + import json + manifest = json.loads(manifest_path.read_text()) + for py_file in sorted(plugins_dir.glob("*.py")): if py_file.name.startswith("_"): continue + + # Hash verification if manifest exists + if manifest and verify_hashes: + import hashlib + file_hash = hashlib.sha3_256(py_file.read_bytes()).hexdigest() + expected = manifest.get(py_file.name) + if expected and file_hash != expected: + logger.warning( + "Plugin %s hash mismatch (expected %s, got %s). Skipping.", + py_file.name, expected[:16], file_hash[:16], + ) + continue + try: module_name = f"qp_vault_plugin_{py_file.stem}" spec = importlib.util.spec_from_file_location(module_name, py_file) diff --git a/src/qp_vault/vault.py b/src/qp_vault/vault.py index 2958870..2d2db9f 100644 --- a/src/qp_vault/vault.py +++ b/src/qp_vault/vault.py @@ -140,10 +140,12 @@ def __init__( policies: list[PolicyProvider] | None = None, config: VaultConfig | None = None, plugins_dir: str | Path | None = None, + tenant_id: str | None = None, ) -> None: self.path = Path(path) self.path.mkdir(parents=True, exist_ok=True) self.config = config or VaultConfig() + self._locked_tenant_id = tenant_id # If set, all operations are scoped to this tenant # Storage backend if storage is not None: