Skip to content

Commit 6eae90d

Browse files
feat(vault): v1.1.0 — classification enforcement, lossless export, telemetry
GAP-1: DataClassification enforcement - CONFIDENTIAL/RESTRICTED content rejects cloud embedders (is_local=False) - is_local property added to EmbeddingProvider Protocol and all implementations - RESTRICTED resource reads emit audit events GAP-2: Import/export content preservation (was data loss) - export_vault now includes chunk content in _chunks array - import_vault reconstructs content from chunks (lossless roundtrip) - Backward compatible: imports without _chunks fall back to name - Added export_vault/import_vault sync wrappers (were missing) GAP-3: Telemetry integration - VaultTelemetry instantiated in AsyncVault.__init__ - add() and search() operations tracked with _tracked() helper - Telemetry summary exposed in status() response - Tracks count, errors, avg/last latency per operation 16 new tests. Verified: ruff 0, mypy strict 0, 602 tests passing.
1 parent 12dc398 commit 6eae90d

9 files changed

Lines changed: 303 additions & 9 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "qp-vault"
7-
version = "1.0.0"
7+
version = "1.1.0"
88
description = "Governed knowledge store for autonomous organizations. Trust tiers, cryptographic audit trails, content-addressed storage, air-gap native."
99
readme = "README.md"
1010
license = "Apache-2.0"

src/qp_vault/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
Docs: https://github.com/quantumpipes/vault
2727
"""
2828

29-
__version__ = "1.0.0"
29+
__version__ = "1.1.0"
3030
__author__ = "Quantum Pipes Technologies, LLC"
3131
__license__ = "Apache-2.0"
3232

src/qp_vault/core/resource_manager.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,19 @@ async def add(
139139
)
140140
)
141141

142+
# Enforce data classification: CONFIDENTIAL/RESTRICTED must use local embedder
143+
if self._embedder and data_class in (
144+
DataClassification.CONFIDENTIAL,
145+
DataClassification.RESTRICTED,
146+
):
147+
is_local = getattr(self._embedder, "is_local", True)
148+
if not is_local:
149+
from qp_vault.exceptions import VaultError
150+
raise VaultError(
151+
f"Cannot use cloud embedder for {data_class.value} content. "
152+
"Use a local embedder (SentenceTransformerEmbedder) or NoopEmbedder."
153+
)
154+
142155
# Generate embeddings if provider available
143156
if self._embedder and chunks:
144157
texts_to_embed = [c.content for c in chunks]

src/qp_vault/embeddings/noop.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ class NoopEmbedder:
2121
def dimensions(self) -> int:
2222
return 0
2323

24+
@property
25+
def is_local(self) -> bool:
26+
return True
27+
2428
async def embed(self, texts: list[str]) -> list[list[float]]:
2529
"""Return empty embeddings (text-only mode)."""
2630
return [[] for _ in texts]

src/qp_vault/embeddings/openai.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ def __init__(
4242
def dimensions(self) -> int:
4343
return self._dimensions
4444

45+
@property
46+
def is_local(self) -> bool:
47+
return False
48+
4549
async def embed(self, texts: list[str]) -> list[list[float]]:
4650
"""Generate embeddings via OpenAI API."""
4751
response = await self._client.embeddings.create(

src/qp_vault/embeddings/sentence.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ def __init__(self, model_name: str = "all-MiniLM-L6-v2") -> None:
3838
def dimensions(self) -> int:
3939
return int(self._dimensions or 0)
4040

41+
@property
42+
def is_local(self) -> bool:
43+
return True
44+
4145
async def embed(self, texts: list[str]) -> list[list[float]]:
4246
"""Generate embeddings for a batch of texts."""
4347
embeddings = self._model.encode(texts, convert_to_numpy=True)

src/qp_vault/protocols.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,15 @@ class EmbeddingProvider(Protocol):
173173
@property
174174
def dimensions(self) -> int: ...
175175

176+
@property
177+
def is_local(self) -> bool:
178+
"""Whether this embedder runs locally (no network calls).
179+
180+
Used to enforce DataClassification: CONFIDENTIAL/RESTRICTED content
181+
must not be sent to cloud embedding services.
182+
"""
183+
return True
184+
176185
async def embed(self, texts: list[str]) -> list[list[float]]: ...
177186

178187

src/qp_vault/vault.py

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ def __init__(
221221

222222
self._initialized = False
223223

224+
# Telemetry: operation tracking
225+
from qp_vault.telemetry import VaultTelemetry
226+
self._telemetry = VaultTelemetry()
227+
224228
# TTL cache for expensive operations (health, status)
225229
self._cache: dict[str, tuple[float, Any]] = {}
226230

@@ -294,6 +298,20 @@ def _resolve_tenant(self, tenant_id: str | None) -> str | None:
294298
)
295299
return tenant_id
296300

301+
async def _tracked(self, operation: str, coro: Any) -> Any:
302+
"""Run a coroutine with telemetry tracking."""
303+
import time as _time
304+
start = _time.monotonic()
305+
error = False
306+
try:
307+
return await coro
308+
except Exception:
309+
error = True
310+
raise
311+
finally:
312+
duration = (_time.monotonic() - start) * 1000
313+
self._telemetry.record(operation, duration, error=error)
314+
297315
# --- Resource Operations ---
298316

299317
async def add(
@@ -436,7 +454,7 @@ async def add(
436454
if membrane_result.recommended_status.value == "quarantined":
437455
_quarantine = True
438456

439-
resource = await self._resource_manager.add(
457+
resource: Resource = await self._tracked("add", self._resource_manager.add(
440458
text,
441459
name=name,
442460
trust_tier=trust_tier,
@@ -449,7 +467,7 @@ async def add(
449467
valid_from=valid_from,
450468
valid_until=valid_until,
451469
tenant_id=tenant_id,
452-
)
470+
))
453471

454472
# If Membrane flagged content, mark as quarantined + suspicious
455473
if _quarantine:
@@ -468,7 +486,21 @@ async def add(
468486
async def get(self, resource_id: str) -> Resource:
469487
"""Get a single resource by ID."""
470488
await self._ensure_initialized()
471-
return await self._resource_manager.get(resource_id)
489+
resource = await self._resource_manager.get(resource_id)
490+
491+
# Audit reads on RESTRICTED resources
492+
if resource.data_classification == DataClassification.RESTRICTED:
493+
from qp_vault.enums import EventType
494+
from qp_vault.models import VaultEvent
495+
await self._auditor.record(VaultEvent(
496+
event_type=EventType.SEARCH,
497+
resource_id=resource_id,
498+
resource_name=resource.name,
499+
resource_hash=resource.content_hash,
500+
details={"classification": "restricted", "operation": "get"},
501+
))
502+
503+
return resource
472504

473505
async def get_multiple(self, resource_ids: list[str]) -> list[Resource]:
474506
"""Get multiple resources by ID in a single query.
@@ -819,7 +851,7 @@ async def search(
819851
)
820852

821853
# Get raw results from storage (with timeout protection)
822-
raw_results = await self._with_timeout(self._storage.search(search_query))
854+
raw_results = await self._tracked("search", self._with_timeout(self._storage.search(search_query)))
823855

824856
# Apply trust weighting with optional layer boost
825857
weighted = apply_trust_weighting(raw_results, self.config, layer_boost=_layer_boost)
@@ -1078,10 +1110,18 @@ async def export_vault(self, path: str | Path) -> dict[str, Any]:
10781110
await self._ensure_initialized()
10791111
self._check_permission("export_vault")
10801112
resources: list[Resource] = await self._list_all_bounded()
1113+
export_resources = []
1114+
for r in resources:
1115+
r_dict = r.model_dump(mode="json")
1116+
# Include chunk content for lossless roundtrip
1117+
chunks = await self._storage.get_chunks_for_resource(r.id)
1118+
sorted_chunks = sorted(chunks, key=lambda c: c.chunk_index)
1119+
r_dict["_chunks"] = [{"content": c.content, "cid": c.cid} for c in sorted_chunks]
1120+
export_resources.append(r_dict)
10811121
data = {
1082-
"version": "1.0.0",
1122+
"version": "1.1.0",
10831123
"resource_count": len(resources),
1084-
"resources": [r.model_dump(mode="json") for r in resources],
1124+
"resources": export_resources,
10851125
}
10861126
out = Path(path)
10871127
out.parent.mkdir(parents=True, exist_ok=True)
@@ -1103,11 +1143,17 @@ async def import_vault(self, path: str | Path) -> list[Resource]:
11031143
data = _json.loads(Path(path).read_text())
11041144
imported = []
11051145
for r_data in data.get("resources", []):
1106-
content = r_data.get("name", "imported")
1146+
# Reconstruct content from chunks (lossless) or fall back to name
1147+
chunks_data = r_data.get("_chunks", [])
1148+
if chunks_data:
1149+
content = "\n\n".join(c["content"] for c in chunks_data)
1150+
else:
1151+
content = r_data.get("name", "imported")
11071152
resource = await self.add(
11081153
content,
11091154
name=r_data.get("name", "imported"),
11101155
trust_tier=r_data.get("trust_tier", "working"),
1156+
classification=r_data.get("data_classification", "internal"),
11111157
tags=r_data.get("tags", []),
11121158
metadata=r_data.get("metadata", {}),
11131159
)
@@ -1164,6 +1210,7 @@ async def status(self) -> dict[str, Any]:
11641210
"layer_details": layer_stats,
11651211
"vault_path": str(self.path),
11661212
"backend": "sqlite",
1213+
"telemetry": self._telemetry.summary(),
11671214
}
11681215
self._cache_set(cache_key, result)
11691216
return result
@@ -1364,6 +1411,15 @@ def status(self) -> dict[str, Any]:
13641411
result: dict[str, Any] = _run_async(self._async.status())
13651412
return result
13661413

1414+
def export_vault(self, path: str | Path) -> dict[str, Any]:
1415+
"""Export the vault to a JSON file."""
1416+
result: dict[str, Any] = _run_async(self._async.export_vault(path))
1417+
return result
1418+
1419+
def import_vault(self, path: str | Path) -> list[Resource]:
1420+
"""Import resources from an exported vault JSON file."""
1421+
return cast("list[Resource]", _run_async(self._async.import_vault(path)))
1422+
13671423
def register_embedder(self, embedder: EmbeddingProvider) -> None:
13681424
"""Register a custom embedding provider."""
13691425
self._async.register_embedder(embedder)

0 commit comments

Comments
 (0)