diff --git a/Cargo.lock b/Cargo.lock index 6c947707..b310fe4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -328,6 +328,18 @@ version = "0.1.0" name = "gatos-policy" version = "0.1.0" +[[package]] +name = "gatos-privacy" +version = "0.1.0" +dependencies = [ + "anyhow", + "blake3", + "gatos-ledger-core", + "hex", + "serde", + "serde_json", +] + [[package]] name = "gatos-wasm-bindings" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 67934114..9279a17a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/gatos-mind", "crates/gatos-echo", "crates/gatos-policy", + "crates/gatos-privacy", "crates/gatos-kv", "crates/gatosd", "bindings/wasm", diff --git a/Makefile b/Makefile index 5259946a..39b8f699 100644 --- a/Makefile +++ b/Makefile @@ -12,14 +12,14 @@ diagrams: @bash -lc 'scripts/mermaid/generate_all.sh' lint-md: - @bash -lc 'if command -v node >/dev/null 2>&1; then \ + @bash -lc 'if command -v node >/dev/null 2>&1; then \ npx -y markdownlint-cli "**/*.md" --config .markdownlint.json; \ elif command -v docker >/dev/null 2>&1; then \ docker run --rm -v "$$PWD:/work" -w /work node:20 bash -lc "npx -y markdownlint-cli \"**/*.md\" --config .markdownlint.json"; \ else echo "Need Node.js or Docker" >&2; exit 1; fi' fix-md: - @bash -lc 'if command -v node >/dev/null 2>&1; then \ + @bash -lc 'if command -v node >/dev/null 2>&1; then \ npx -y markdownlint-cli "**/*.md" --fix --config .markdownlint.json; \ elif command -v docker >/dev/null 2>&1; then \ docker run --rm -v "$$PWD:/work" -w /work node:20 bash -lc "npx -y markdownlint-cli \"**/*.md\" --fix --config .markdownlint.json"; \ @@ -44,7 +44,8 @@ schema-compile: npx -y ajv-cli@5 ajv compile --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/governance/grant.schema.json -r schemas/v1/common/ids.schema.json && \ npx -y ajv-cli@5 ajv compile --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/governance/revocation.schema.json -r schemas/v1/common/ids.schema.json && \ npx -y ajv-cli@5 ajv compile --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/governance/proof_of_consensus_envelope.schema.json -r schemas/v1/common/ids.schema.json && \ - npx -y ajv-cli@5 ajv compile --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/policy/governance_policy.schema.json' + npx -y ajv-cli@5 ajv compile --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/policy/governance_policy.schema.json && \ + npx -y ajv-cli@5 ajv compile --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/privacy/opaque_pointer.schema.json -r schemas/v1/common/ids.schema.json' schema-validate: @bash -lc 'set -euo pipefail; \ @@ -57,7 +58,8 @@ schema-validate: npx -y ajv-cli@5 ajv validate --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/governance/grant.schema.json -d examples/v1/governance/grant_min.json -r schemas/v1/common/ids.schema.json && \ npx -y ajv-cli@5 ajv validate --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/governance/revocation.schema.json -d examples/v1/governance/revocation_min.json -r schemas/v1/common/ids.schema.json && \ npx -y ajv-cli@5 ajv validate --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/governance/proof_of_consensus_envelope.schema.json -d examples/v1/governance/poc_envelope_min.json -r schemas/v1/common/ids.schema.json && \ - npx -y ajv-cli@5 ajv validate --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/policy/governance_policy.schema.json -d examples/v1/policy/governance_min.json' + npx -y ajv-cli@5 ajv validate --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/policy/governance_policy.schema.json -d examples/v1/policy/governance_min.json && \ + npx -y ajv-cli@5 ajv validate --spec=draft2020 --strict=true -c ajv-formats -s schemas/v1/privacy/opaque_pointer.schema.json -d examples/v1/privacy/opaque_pointer_min.json -r schemas/v1/common/ids.schema.json' schema-negative: @bash -lc 'set -euo pipefail; \ diff --git a/crates/gatos-privacy/Cargo.toml b/crates/gatos-privacy/Cargo.toml new file mode 100644 index 00000000..631ea04c --- /dev/null +++ b/crates/gatos-privacy/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "gatos-privacy" +version = "0.1.0" +edition = "2021" + +[dependencies] +gatos-ledger-core = { path = "../gatos-ledger-core" } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +blake3 = { workspace = true } +hex = { workspace = true } +anyhow = { workspace = true } + diff --git a/crates/gatos-privacy/src/lib.rs b/crates/gatos-privacy/src/lib.rs new file mode 100644 index 00000000..28e54abb --- /dev/null +++ b/crates/gatos-privacy/src/lib.rs @@ -0,0 +1,42 @@ +//! gatos-privacy — Opaque Pointer types and helpers +//! +//! This crate defines the JSON-facing pointer envelope used by the +//! hybrid privacy model (ADR-0004). The struct mirrors the v1 schema +//! in `schemas/v1/privacy/opaque_pointer.schema.json`. +//! +//! Canonicalization: when computing content IDs or digests, callers +//! MUST serialize JSON using RFC 8785 JCS. This crate intentionally +//! does not take a dependency on a specific JCS implementation to +//! keep the workspace lean; higher layers may provide one. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct OpaquePointer { + pub kind: Kind, + pub algo: Algo, + pub digest: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub ciphertext_digest: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, + pub location: String, + pub capability: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub extensions: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Kind { + OpaquePointer, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum Algo { + Blake3, +} + diff --git a/docs/FEATURES.md b/docs/FEATURES.md index 9b655fc2..5706e3ee 100644 --- a/docs/FEATURES.md +++ b/docs/FEATURES.md @@ -141,29 +141,6 @@ Each feature includes user stories per relevant stakeholders (format requested), --- -## F6 — Opaque Pointers & CAS - -### F6-US-DML - -| | | -|--|--| -| **As a...** | Data/ML Engineer | -| **I want..** | encrypted artifacts with verifiable pointers | -| **So that...** | I can ship models across untrusted storage | - -#### Acceptance Criteria - -- [ ] Pointer includes plaintext hash, ciphertext hash, cipher meta -- [ ] Rekey operation available - -#### Test Plan - -- [ ] Golden: decrypt with correct key → match plaintext hash -- [ ] Edge: wrong bytes → hash mismatch -- [ ] Failure: rekey without authorization → deny - ---- - ## F7 — Epochs & Compaction ### F7-US-PENG @@ -208,3 +185,30 @@ Each feature includes user stories per relevant stakeholders (format requested), - [ ] Golden: metrics show non-zero counters post workload - [ ] Edge: cache stale → doctor recommends rebuild - [ ] Failure: FF-only violation → doctor flags critical +--- + +## F9 — Hybrid Privacy Model + +See also: [ADR-0004](./decisions/ADR-0004/DECISION.md). + +### F9-US-DEV + +### F9-US-SEC + +| | | +|--|--| +| **As a...** | Security/Compliance | +| **I want..** | to audit the separation of public and private data | +| **So that...** | I can verify that sensitive data is properly isolated and access is controlled | + +#### Acceptance Criteria + +- [ ] Opaque Pointer resolution fails without a valid capability. +- [ ] Private blob digest matches the digest in the public pointer. +- [ ] Commit trailers (`Privacy-Redactions`, `Privacy-Pointers`) accurately report the number of redactions/pointers. + +#### Test Plan + +- [ ] Golden: project a unified state, resolve pointer, and verify content matches original. +- [ ] Edge: attempt to resolve a pointer with an invalid capability URI → DENY. +- [ ] Failure: tamper with a private blob → digest mismatch on resolution. diff --git a/docs/SPEC.md b/docs/SPEC.md index 041cf0bd..728c4544 100644 --- a/docs/SPEC.md +++ b/docs/SPEC.md @@ -71,7 +71,7 @@ graph TD end subgraph "Job Plane" - Compute("gatos-compute"); + Compute("gatos-compute (planned)"); end subgraph "Ledger Plane" @@ -128,6 +128,7 @@ graph TD A1 --> B6(audit) A1 --> B7(cache) A1 --> B8(epoch) + A1 --> B9(private) C(notes) --> C1(gatos) end subgraph Workspace @@ -148,6 +149,8 @@ The normative layout is as follows: │ └── gatos/ │ ├── journal/ │ ├── state/ +│ ├── private/ +│ │ └── / # e.g., the actor's ed25519 public key │ ├── mbus/ │ ├── mbus-ack/ │ ├── jobs/ @@ -282,28 +285,80 @@ On **DENY**, the gate **MUST** append an audit decision to `refs/gatos/audit/pol --- -## 7. Blob Pointers & Opaque Storage +## 7. Privacy and Opaque Pointers -Large or sensitive data is stored out-of-band in a content-addressed store and referenced via pointers. +See also: [ADR‑0004](./decisions/ADR-0004/DECISION.md). + +GATOS supports a hybrid privacy model where state can be separated into a verifiable public projection and a confidential private overlay. This is achieved by applying a deterministic **Projection Functor** during the state fold process, which replaces sensitive or large data with **Opaque Pointers**. + +### 7.1 Projection Model + +The State Engine (`gatos-echo`) can be configured with privacy rules. When folding history, it first computes a `UnifiedState` containing all data. It then applies the privacy rules to produce a `PublicState` and a set of `PrivateBlobs`. + +- **`PublicState`**: Contains only public data and Opaque Pointers. This is committed to the public `refs/gatos/state/public/...` namespace and is globally verifiable. +- **`PrivateBlobs`**: The raw data that was redacted or pointerized. This data is stored in a separate, private store (e.g., a local directory, a private object store) and is addressed by its content hash. + +Any commit that is the result of a privacy projection **MUST** include trailers indicating the number of redactions and pointers created. + +```text +Privacy-Redactions: 5 +Privacy-Pointers: 2 +``` + +### 7.2 Opaque Pointers + +An Opaque Pointer is a canonical JSON object that acts as a verifiable, addressable link to a private blob. It replaces the sensitive data in the `PublicState`. ```mermaid classDiagram - class BlobPointer { - +String kind: "blobptr" - +String algo - +String hash - +Number size - } class OpaquePointer { - +String kind: "opaque" - +String algo - +String hash - +String ciphertext_hash - +Object cipher_meta + +string kind: "opaque_pointer" + +string algo: "blake3" + +string digest: "blake3:" // plaintext digest + +string ciphertext_digest: "blake3:" // optional + +int size // bytes; SHOULD be present + +string location + +string capability // MUST NOT embed secrets + +object extensions // forward-compatible } ``` -Pointers **MUST** refer to bytes in `gatos/objects//`. For opaque objects, no plaintext **MAY** be stored in Git. +- `digest`: The **REQUIRED** `blake3` hash of the plaintext. For low‑entropy privacy classes, the public pointer MUST NOT expose this value. +- `ciphertext_digest`: The `blake3` hash of the stored ciphertext. For low‑entropy privacy classes, this field MUST be present in the public pointer. +- `size`: The size of the private blob in bytes (RECOMMENDED). +- `location`: A **REQUIRED** stable URI indicating where the blob can be fetched (e.g., `gatos-node://ed25519:`, `s3://bucket/key`). Do not embed pre‑signed tokens. +- `capability`: A **REQUIRED** reference to the authn/z + decryption mechanism (e.g., `gatos-key://...`, `kms://...`). It MUST NOT embed secrets; resolution occurs at the policy layer. + +The pointer itself is canonicalized via RFC 8785 JCS and its `content_id` is `blake3(JCS(pointer_json))`. + +### 7.3 Pointer Resolution + +Endpoint and AuthN: +- Clients MUST resolve via `POST /gatos/private/blobs/resolve` with body `{ "digest": "blake3:", "want": "plaintext"|"ciphertext" }` and `Authorization: Bearer `. +- Tokens MUST include standard claims (`sub`, `aud`, `method`, `path`, `exp`, `nbf`); skew tolerance ±300s. 401 for authn failures; 403 for policy denials. + +Verification Steps: +1. Fetch the ciphertext blob from `location` via the node’s resolver endpoint. +2. Acquire the necessary keys via the `capability` reference (policy-driven; no secrets in the pointer). +3. Decrypt. Compute `blake3(ciphertext)` and compare with `ciphertext_digest` when present; compute `blake3(plaintext)` and compare with `digest` when exposed. Any mismatch MUST yield `DigestMismatch`. +4. Servers SHOULD return `X-BLAKE3-Digest` and `Digest: sha-256=…` headers for response integrity. + +Error Taxonomy: +- `Unauthorized` (401), `Forbidden` (403), `NotFound` (404), `DigestMismatch` (422), `CapabilityUnavailable` (503), `PolicyDenied` (403). + +Optional HTTP Message Signatures profile (RFC 9421): +- As an alternative to JWT, clients MAY sign `@method`, `@target-uri`, `date`, `host`, `content-digest` and send `Signature-Input`/`Signature` headers. Servers SHOULD still emit `Digest` and `X-BLAKE3-Digest` response headers. + +Pointer Rotation (Rekey): +1) fetch ciphertext; 2) decrypt; 3) re‑encrypt per new capability; 4) store new ciphertext; 5) emit rotation event updating pointer fields (capability/location). `digest` (plaintext) MUST remain stable. Add trailer `Privacy-Pointer-Rotations: `. + +Namespacing: +- `refs/gatos/private//…` holds private overlay indices/metadata only; workspace mirror is `gatos/private//…`. Blobs live in external stores keyed by digest. + +Canonicalization: +- All JSON labeled as canonical MUST use RFC 8785 JCS; non‑JSON maps MUST be ordered lexicographically by lowercase UTF‑8 keys. + +This process guarantees that even though the data is stored privately, its integrity is verifiable against the public ledger. --- @@ -624,7 +679,7 @@ Proposal → Approvals (N‑of‑M) → Grant. Quorum groups (e.g., `@leads`) MU Proposal-Id: blake3: Approval-Id: blake3: Signer: ed25519: - Expires-At: # OPTIONAL + Expires-At: # OPTIONAL. If present, the approval is only valid until this time. It cannot extend the proposal's expiration. ``` - Grant (at `refs/gatos/grants/…`): @@ -640,10 +695,10 @@ Proposal → Approvals (N‑of‑M) → Grant. Quorum groups (e.g., `@leads`) MU `Proof-Of-Consensus` is the BLAKE3 of a canonical JSON envelope containing: - The canonical proposal envelope (by value or `Proposal-Id`). -- A sorted list (by `Signer`) of all valid approvals used to reach quorum (by value or `Approval-Id`). +- A lexicographically sorted list of approvals ordered by the lowercase ASCII of each approval's `Signer` value (the `ed25519:` string). Each approval is included by value or via `Approval-Id`. - The governance rule id (`Policy-Rule`) and effective quorum parameters. -PoC envelope SHOULD be stored canonically under `refs/gatos/audit/proofs/governance/`; the Grant’s `Proof-Of-Consensus` trailer MUST equal `blake3(envelope_bytes)`. +PoC envelope MUST be stored canonically under `refs/gatos/audit/proofs/governance/`; the Grant’s `Proof-Of-Consensus` trailer MUST equal `blake3(envelope_bytes)`. ### 20.4 Lifecycle States diff --git a/docs/TECH-SPEC.md b/docs/TECH-SPEC.md index 4f991b54..ecddb6b9 100644 --- a/docs/TECH-SPEC.md +++ b/docs/TECH-SPEC.md @@ -101,10 +101,10 @@ graph TD | `gatos-ledger-git` | `std`-dependent storage backend using `libgit2`. | | `gatos-ledger` | Composes ledger components via feature flags. | | `gatos-mind` | Asynchronous, commit-backed message bus (pub/sub). | -| `gatos-echo` | Deterministic state engine for processing events ("folds"). | -| `gatos-policy` | Deterministic policy engine for executing compiled rules and managing the Consensus Governance lifecycle. | -| `gatos-kv` | Git-backed key-value state cache. | -| `gatosd` | Main binary for the CLI and the JSONL RPC daemon. | +| `gatos-echo` | Deterministic state engine for processing events ("folds"). Privacy projection logic. | +| `gatos-policy` | Deterministic policy engine for executing compiled rules, managing Consensus Governance, and privacy rule evaluation. | +| `gatos-kv` | Git-backed key-value state cache, used for materializing and indexing queryable views of folded state. | +| `gatosd` | Main binary for the CLI, JSONL RPC daemon, and Opaque Pointer resolution endpoint. | | `gatos-compute` | Worker that discovers and executes jobs from the Job Plane. | | `gatos-wasm-bindings`| WASM bindings for browser and Node.js environments. | | `gatos-ffi-bindings` | C-compatible FFI for integration with other languages. | @@ -160,22 +160,67 @@ sequenceDiagram --- -## 6. Opaque Pointers +## 6. Privacy Projection and Resolution -The `rekey` command allows updating the encryption key for an opaque blob. +See also: [ADR‑0004](./decisions/ADR-0004/DECISION.md). + +The implementation of the hybrid privacy model involves a coordinated effort between the state, policy, and daemon components. + +### 6.1 Projection Implementation + +The projection from a `UnifiedState` to a `PublicState` is handled by `gatos-echo` with rules supplied by `gatos-policy`. ```mermaid sequenceDiagram - participant User - participant GATOS - - User->>GATOS: gatos blob rekey --to - GATOS->>GATOS: Create new Opaque Pointer - GATOS->>GATOS: Encrypt data with new pubkey - GATOS->>GATOS: Store new ciphertext in CAS - GATOS->>GATOS: Atomically update references + participant gatos-echo + participant gatos-policy + participant gatos-ledger + participant "StorageBackend (Interface)" + + Echo->>Echo: 1. Fold event history to produce UnifiedState + Echo->>Policy: 2. Request privacy rules for the current context + Policy-->>Echo: 3. Return `select` and `action` rules +loop for each field path in the UnifiedState tree + gatos-echo->>gatos-echo: 4. Match field path against rules + alt rule matches (e.g., "pointerize") + Echo->>Echo: 5. Generate Opaque Pointer envelope + Echo->>PrivateStore: 6. Store original node value as private blob, keyed by its blake3 digest + Echo->>Echo: 7. Replace node in state tree with pointer + end + end + Echo->>Ledger: 8. Commit the final PublicState tree ``` +The `PrivateStore` is a pluggable trait, allowing for backends like a local filesystem, S3, or another GATOS node. + +### 6.2 Resolution Implementation + +The `gatosd` daemon exposes a secure endpoint for resolving Opaque Pointers. + +- Endpoint: `POST /gatos/private/blobs/resolve` +- Content-Type: `application/json` +- Request body (JCS canonical JSON): + ```json + { "digest": "blake3:", "want": "plaintext" } + ``` + - `want` OPTIONAL: `"plaintext" | "ciphertext"` (default `"plaintext"`). +- Authentication: `Authorization: Bearer ` + - Claims (example): `iss`, `sub` (ed25519:), `aud` ("gatos-node:"), `exp`, `nbf`, `jti`, `method` ("POST"), `path` ("/gatos/private/blobs/resolve"), `digest` (MUST match body.digest). + - Clock skew tolerance: ±300 seconds. +- Authorization: Node evaluates policy for `` on ``. +- Response (200 OK): + - Headers: `Digest: sha-256=`, `X-BLAKE3-Digest: blake3:` + - Body: requested bytes (ciphertext or plaintext). + +Errors: 401 Unauthorized, 403 Forbidden, 404 Not Found, 422 DigestMismatch, 503 CapabilityUnavailable. + +Optional profile (HTTP Message Signatures, RFC 9421): +- Clients MAY authenticate by signing components: `@method`, `@target-uri`, `date`, `host`, `content-digest` (SHA-256 over request body) and sending `Signature-Input: sig1=...` and `Signature: sig1=::`. +- Servers STILL apply policy and SHOULD return `Digest` and `X-BLAKE3-Digest` headers. + +Pointer Rotation (Rekey): +- Implement a rotation that: (1) fetches; (2) decrypts; (3) re‑encrypts; (4) stores; (5) emits an audit event updating pointer fields while keeping plaintext `digest` stable. Add trailer `Privacy-Pointer-Rotations: ` when a projection commit includes rotations. + --- ## 7. JSONL Protocol @@ -236,8 +281,11 @@ graph TD C --> C1(Golden Vectors); C --> C2(Torture Tests); C --> C3(Reconcile Harness); + C --> C4(Projection Determinism); ``` +- **Projection Determinism**: Verifies that applying the same privacy policy to the same `UnifiedState` on different platforms (Linux, macOS, Windows) produces a byte-for-byte identical `PublicState` and the same set of private blobs. + --- ## 10. Security @@ -281,7 +329,7 @@ Tuning batch size is a trade-off between latency and commit churn. ```mermaid xychart-beta - title "Batch Size Trade-off" + title "Batch Size Trade-off (Illustrative)" x-axis "Batch Size" y-axis "Metric" line "Latency" [50, 40, 35, 32, 30] diff --git a/docs/USE-CASES.md b/docs/USE-CASES.md index f3c52cb7..26e7100c 100644 --- a/docs/USE-CASES.md +++ b/docs/USE-CASES.md @@ -83,3 +83,13 @@ This document illustrates practical scenarios where GATOS provides unique value. |**Goal** | Signed toggles with audit and rollbacks. | | **How** | KV‑style events + index refs; push‑gate for enforcement. | | **Why GATOS** | Auditable configuration without a new database. | + +--- + +## 9) Verifiable, Compliant PII Management + +| | | +|---|---| +|**Goal** | Manage customer data (PII) in a way that is both auditable and privacy-preserving. | +| **How** | A privacy policy projects the unified state into a public state with PII replaced by Opaque Pointers. The private data lives in an actor-anchored, encrypted blob store. | +| **Why GATOS** | Provides a verifiable public audit trail ("a user's data was accessed") without ever exposing the private data ("the user's address is...") to the public ledger. Access is gated by cryptographic capabilities. | \ No newline at end of file diff --git a/docs/decisions/ADR-0003/DECISION.md b/docs/decisions/ADR-0003/DECISION.md index 45f29beb..3e4683ee 100644 --- a/docs/decisions/ADR-0003/DECISION.md +++ b/docs/decisions/ADR-0003/DECISION.md @@ -86,10 +86,10 @@ Define a system for gating specific GATOS actions (e.g., locking a file, publish 7. Proof‑Of‑Consensus (normative) - The `Proof-Of-Consensus` digest MUST be the BLAKE3 of a canonical envelope that includes (see schema: [`schemas/v1/governance/proof_of_consensus_envelope.schema.json`](../../../schemas/v1/governance/proof_of_consensus_envelope.schema.json)): - The canonical proposal envelope (by value or by `Proposal-Id`). - - A sorted list (by `Signer`) of all valid approvals used to reach quorum (each by value or `Approval-Id`). + - A lexicographically sorted list of approvals by the lowercase ASCII of each approval's `Signer` value (the `ed25519:` string). Each approval is included by value or via `Approval-Id`. - The governance rule id (`Policy-Rule`) and effective quorum parameters. - Implementations MUST use canonical JSON (UTF‑8, sorted keys, no insignificant whitespace) to build this envelope before hashing. All hex encodings MUST be lowercase. Ordering by signer is an application‑level MUST; JSON Schema cannot enforce sort order. - - Storage: The canonical PoC envelope JSON SHOULD be persisted as a blob referenced under `refs/gatos/audit/proofs/governance/`; the `Proof-Of-Consensus` trailer MUST equal `blake3(envelope_bytes)`. + - Storage: The canonical PoC envelope JSON MUST be persisted as a blob referenced under `refs/gatos/audit/proofs/governance/`; the `Proof-Of-Consensus` trailer MUST equal `blake3(envelope_bytes)`. 8. Governance schema (policy integration) - Extend `.gatos/policy.yaml` to declare governance rules (JSON Schema: [`schemas/v1/policy/governance_policy.schema.json`](../../../schemas/v1/policy/governance_policy.schema.json)): diff --git a/docs/decisions/ADR-0004/DECISION.md b/docs/decisions/ADR-0004/DECISION.md new file mode 100644 index 00000000..151b5275 --- /dev/null +++ b/docs/decisions/ADR-0004/DECISION.md @@ -0,0 +1,286 @@ +--- +Status: Accepted +Date: 2025-11-10 +ADR: ADR-0004 +Authors: [flyingrobots, gemini-agent] +Requires: [ADR-0001] +Related: [ADR-0002, ADR-0003] +Tags: [Privacy, Projection, Opaque Pointers, Morphology Calculus] +Schemas: + - schemas/v1/privacy/opaque_pointer.schema.json +--- + +# ADR‑0004: Hybrid Privacy Model (Public Projection + Private Overlay) + +## Scope + +This ADR defines a **hybrid privacy model** for the GATOS operating surface. It formalizes the separation of state into a public, verifiable component and a private, actor-anchored overlay. This is achieved by introducing a **Projection Functor** that transforms a unified state into a public projection, leaving sensitive data in a private store referenced by **Opaque Pointers**. + +## Rationale + +GATOS's core value proposition is its verifiable, deterministic public ledger. However, many real-world applications require storing sensitive or large data (PII, secrets, large binaries) without committing it to the public history. The previous ad-hoc approach of using local, out-of-repo storage lacks the formal guarantees required by the GATOS Morphology Calculus. + +This ADR makes the hybrid model **normative, deterministic, and provable**. It ensures that public state remains globally verifiable while private data is securely addressable, auditable, and tied to the GATOS identity and policy model. + +## Mathematical Foundation (Morphology Calculus) + +This model is a direct application of the GATOS Morphology Calculus. + +1. **Shape Categories**: We define three categories of shapes: + * `Sh_Unified`: The category of shapes containing both public and private data. + * `Sh_Public`: The category of shapes containing only public data and opaque pointers. + * `Sh_Private`: The category of shapes containing only the private data blobs. + +2. **Projection as a Functor**: The privacy model is implemented as a functor, `Proj`, which maps shapes and morphisms from the unified category to the public category. + `Proj: Sh_Unified -> Sh_Public` + + This functor applies the privacy policy rules (`redact`, `pointerize`) to transform a unified shape into its public projection. The private data is extracted into `Sh_Private` during this process. + + ```mermaid + graph TD + subgraph Sh_Unified + U1("Unified Shape 1") + U2("Unified Shape 2") + U1 -- "Commit c" --> U2 + end + + subgraph Sh_Public + P1("Public Shape 1") + P2("Public Shape 2") + P1 -- "Proj(c)" --> P2 + end + + subgraph Sh_Private + B1("Private Blobs 1") + B2("Private Blobs 2") + end + + U1 -- "Proj" --> P1 + U2 -- "Proj" --> P2 + + U1 -- "Extract" --> B1 + U2 -- "Extract" --> B2 + + style P1 fill:#cde,stroke:#333 + style P2 fill:#cde,stroke:#333 + ``` + +This ensures that the transformation is structure-preserving and that the public history remains a valid, deterministic projection of the complete history. + +## Decision + +### 1. Actor-Anchored Private Namespace (Normative) + +Private data overlays are fundamentally tied to an actor's identity, not an ephemeral session. This anchors private data within the GATOS trust graph. + +- **Actor ID:** The canonical identifier for an actor, e.g., `ed25519:`. +- **Private Refs:** Private data is stored under refs namespaced by the actor ID. + ``` + refs/gatos/private/// + ``` +- **Public Refs:** The corresponding public projection lives in the main state namespace. + ``` + refs/gatos/state/public// + ``` + +### 2. Opaque Pointers (Normative) + +When private data is elided from the `PublicState`, a canonical JSON **Opaque Pointer** envelope is inserted in its place. + +```mermaid +classDiagram + class OpaquePointer { + +string kind: "opaque_pointer" + +string algo: "blake3" + +string digest: "blake3:" // plaintext digest + +string ciphertext_digest: "blake3:" // MAY be present + +int size // SHOULD be present (bytes) + +string location + +string capability // MUST NOT embed secrets + +object extensions // forward-compatible + } +``` + +- **`digest`**: The content-address of the private plaintext (`blake3(plaintext_bytes)`). This is the immutable link between the public and private worlds. +- **`ciphertext_digest`**: The content-address of the stored ciphertext (`blake3(ciphertext_bytes)`). For low‑entropy privacy classes (see Policy Hooks), the public pointer **MUST** include `ciphertext_digest` and policy **MUST NOT** expose the plaintext digest publicly. +- **`location`**: A URI indicating where to resolve the blob. Supported schemes include: + - `gatos-node://ed25519:`: Resolve via the GATOS trust graph. + - `https://...`, `s3://...`, `ipfs://...`: Standard distributed storage. + - `file:///...`: For local development and testing. +- **`capability`**: A reference identifying the authorization and decryption mechanism required to access the blob. It **MUST NOT** embed secrets or pre‑signed tokens. It SHOULD be a stable identifier (e.g., `gatos-key://v1/aes-256-gcm/` or `kms://...`) that can be resolved privately at the policy layer. + - Pointers MAY publish a non‑sensitive label and keep resolver details private via policy. Implementations MAY also place auxiliary hints inside `extensions`. + +The canonical `content_id` of the pointer itself is `blake3(JCS(pointer_json))`, where `JCS(…)` denotes RFC 8785 JSON Canonicalization Scheme applied to UTF‑8 bytes. This rule is normative for all canonical JSON in GATOS (pointers, governance envelopes, any JSON state snapshots). + +**Schema:** `schemas/v1/privacy/opaque_pointer.schema.json` + +### 3. The Projection Function (Normative) + +The State Engine (`gatos-echo`) is responsible for executing the projection. + +1. It computes a **UnifiedState** by folding the complete event history. +2. It consults the **Privacy Policy** (`.gatos/policy.yaml`). +3. It traverses the `UnifiedState` tree, applying `redact` or `pointerize` rules. + - `redact`: The field is removed from the public state. + - `pointerize`: The field's value is stored as a private blob, and an Opaque Pointer is substituted in the public state. +4. The resulting `PublicState` is committed to the public refs, and the `Private Blobs` are persisted to their specified `location`. + +Determinism Requirements: +- All JSON artifacts produced during projection (including Opaque Pointers) MUST be canonicalized with RFC 8785 JCS prior to hashing. +- When non‑JSON maps are materialized (e.g., Git tree entries), keys MUST be ordered lexicographically by their lowercase UTF‑8 bytes. + +```mermaid +sequenceDiagram + participant E as State Engine (gatos-echo) + participant Pol as Policy Engine + participant L as Ledger (Git) + participant PS as Private Store + + E->>E: 1. Fold history into UnifiedState + E->>Pol: 2. Fetch privacy rules + Pol-->>E: 3. Return rules (redact/pointerize) + E->>E: 4. Apply rules to create PublicState + PrivateBlobs + E->>L: 5. Commit PublicState to public refs + E->>PS: 6. Store PrivateBlobs by digest +``` + +### 4. Pointer Resolution Protocol (Normative) + +Authentication semantics are aligned with HTTP. We adopt a simple, interoperable model (JWT default; HTTP Message Signatures optional): + +- **Endpoint**: `POST /gatos/private/blobs/resolve` +- **Request Body (application/json; JCS canonical form)**: + `{ "digest": "blake3:", "want": "plaintext"|"ciphertext" }` +- **Authorization**: `Authorization: Bearer ` + - Claims MUST include: `sub` (ed25519:), `aud` (node id or URL), `method` ("POST"), `path` ("/gatos/private/blobs/resolve"), `exp`, and `nbf`. + - Clock skew tolerance: ±300 seconds. + - On missing/invalid token: `401 Unauthorized`. On policy denial: `403 Forbidden`. + +A client resolving an Opaque Pointer **MUST** follow this protocol: + +1. **Parse Pointer**: Extract `digest`, optional `ciphertext_digest`, `location`, and `capability`. +2. **Fetch Blob**: + - If `gatos-node://`, resolve the actor's endpoint from the trust graph, then `POST /gatos/private/blobs/resolve` with the body above. + - The node **MUST** verify the bearer token and enforce policy before returning the blob. +3. **Acquire Capability**: + - Resolve the `capability` reference via the configured key system (KMS, key server). Secrets MUST NOT be embedded in the pointer. +4. **Decrypt and Verify**: + - Decrypt the fetched blob using the resolved key and AAD parameters (see Security Notes). + - Compute `blake3(plaintext)` and compare to `digest` if published; compute `blake3(ciphertext)` and compare to `ciphertext_digest` if published. A mismatch **MUST** produce `DigestMismatch`. + +Response headers on success: +``` +Content-Type: application/octet-stream +X-BLAKE3-Digest: blake3: +Digest: sha-256= +``` + +Optional HTTP Message Signatures profile (RFC 9421): +- Clients MAY authenticate by signing `@method`, `@target-uri`, `date`, `host`, `content-digest` (SHA‑256 of the JSON body) and sending `Signature-Input` and `Signature` headers. +- Servers SHOULD still return `Digest` and `X-BLAKE3-Digest` headers for response integrity. + +```mermaid +sequenceDiagram + participant C as Client + participant PN as Private GATOS Node + participant KMS as Key Management Service + + C->>C: 1. Read OpaquePointer + C->>PN: 2. POST /gatos/private/blobs/resolve (Authorization: Bearer ) + PN->>PN: 3. Check policy (is C allowed?) + alt Authorized + PN-->>C: 4. Return encrypted blob + C->>KMS: 5. Request key for {capability} + KMS-->>C: 6. Return decryption key + C->>C: 7. Decrypt blob + C->>C: 8. Verify blake3(decrypted) == digest + else Unauthorized + PN-->>C: 4. Return 401/403 + end +``` + +### 5. Policy Hooks (Normative) + +The privacy policy is defined in `.gatos/policy.yaml` and extends the policy engine's domain. + +```yaml +privacy: + classes: + pii_low_entropy: + min_entropy_bits: 40 + publish_plaintext_digest: false + require_ciphertext_digest: true + rules: + - select: "path.to.sensitive.data" + action: "pointerize" + class: "pii_low_entropy" + capability: "gatos-key://v1/aes-256-gcm/ops-key-01" + location: "gatos-node://ed25519:" + - select: "path.to.transient.data" + action: "redact" +``` + +The `select` syntax will use a simple path-matching language (e.g., glob patterns) defined by the policy engine. + +### 6. Auditability and Trailers (Normative) + +To make privacy operations transparent and auditable, any commit that creates a `PublicState` from a projection **MUST** include the following trailers: + +``` +Privacy-Redactions: 3 +Privacy-Pointers: 12 +Privacy-Pointer-Rotations: 1 +``` + +This provides a simple, top-level indicator that a projection has occurred, prompting auditors to look deeper if necessary. + +## Consequences + +### Pros + +- **Provable Privacy**: The model is grounded in the Morphology Calculus, making it verifiable. +- **Decoupled Storage**: Private data can live in any storage system (S3, IPFS, local disk) without affecting the public ledger's logic. +- **Integrated Auth/Authz**: By tying pointers to actor identities and capabilities, access to private data is governed by the existing GATOS trust and policy model. +- **Preserves Verifiability**: The `PublicState` remains globally verifiable, as pointers are just content-addressed links. + +### Cons + +- **Increased Complexity**: Resolution requires network requests and interaction with key management systems, adding latency and potential points of failure. +- **Operational Overhead**: Operators must manage the private blob stores and ensure their availability and security. + +## Feature Payoff + +- **Secure PII/Secret Storage**: Store sensitive data off-chain while retaining an auditable link to it. +- **Large Artifact Management**: Handle large binaries (ML models, videos) without bloating the Git repository. +- **Compliant Data Sharing**: Share a public, redacted dataset with third parties while retaining private access to the full, unified view. +- **Federated Learning**: Different actors can hold private models locally, referenced by pointers in a public "training plan" shape. + +--- + +## Namespacing and Storage (Normative) + +- Private overlays are actor‑anchored: `refs/gatos/private///` index metadata. The local workspace mirror is `gatos/private///`. +- Private blobs themselves are NOT stored under Git refs. They live in pluggable blob stores and are addressed by their `ciphertext_digest`/`digest`. + +## Security & Privacy Notes (Normative) + +- Capability references in pointers MUST NOT contain secrets or pre‑signed tokens. Use stable identifiers and resolve sensitive data via policy. +- AES‑256‑GCM (if used) MUST include AAD composed of: actor id, pointer `content_id`, and policy version; nonces MUST be 96‑bit, randomly generated, and never reused per key. +- Right‑to‑be‑forgotten: deleting private blobs breaks pointer resolution but does not remove the public pointer. Implement erasure as a tombstone event plus an audit record. + +### Algorithm variants (experimental; private attestations only) + +- Implementations MAY use a keyed BLAKE3 variant for private attestation envelopes (not for public Opaque Pointers): `algo = "blake3-keyed"` with parameters encoded in an envelope or pointer `extensions` field. +- Recommended KDF: `hkdf-sha256`; context string `"gatos:ptr:priv:"`; derive `key = HKDF(policy_key, salt = actor_pubkey, info = context)`. +- Public pointers MUST continue to use `algo = "blake3"` for third‑party verifiability. + +## Error Taxonomy (Normative) + +Implementations SHOULD use a stable set of error codes with JSON problem details: + +- `Unauthorized` (401) +- `Forbidden` (403) +- `NotFound` (404) +- `DigestMismatch` (422) +- `CapabilityUnavailable` (503) +- `PolicyDenied` (403) diff --git a/docs/decisions/README.md b/docs/decisions/README.md index 94b0994e..b61db3c4 100644 --- a/docs/decisions/README.md +++ b/docs/decisions/README.md @@ -20,3 +20,4 @@ Each ADR will have a status, typically one of the following: | [ADR-0001](./ADR-0001/DECISION.md) | Split gatos-ledger into no_std Core and std Backends | Accepted | 2025-11-08 | | [ADR-0002](./ADR-0002/DECISION.md) | Distributed Compute via a Job Plane | Accepted | 2025-11-08 | | [ADR-0003](./ADR-0003/DECISION.md) | Consensus Governance for Gated Actions | Accepted | 2025-11-08 | +| [ADR-0004](./ADR-0004/DECISION.md) | Hybrid Privacy Model (Public Projection + Private Overlay) | Accepted | 2025-11-10 | diff --git a/examples/v1/policy/privacy_min.json b/examples/v1/policy/privacy_min.json new file mode 100644 index 00000000..c510c3b9 --- /dev/null +++ b/examples/v1/policy/privacy_min.json @@ -0,0 +1,21 @@ +{ + "privacy": { + "classes": { + "pii_low_entropy": { + "min_entropy_bits": 40, + "publish_plaintext_digest": false, + "require_ciphertext_digest": true + } + }, + "rules": [ + { + "select": "user.email", + "action": "pointerize", + "class": "pii_low_entropy", + "capability": "gatos-key://v1/aes-256-gcm/ops-key-01", + "location": "gatos-node://ed25519:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + } + ] + } +} + diff --git a/examples/v1/privacy/opaque_pointer_min.json b/examples/v1/privacy/opaque_pointer_min.json new file mode 100644 index 00000000..eb3cf057 --- /dev/null +++ b/examples/v1/privacy/opaque_pointer_min.json @@ -0,0 +1,9 @@ +{ + "kind": "opaque_pointer", + "algo": "blake3", + "digest": "blake3:0000000000000000000000000000000000000000000000000000000000000000", + "ciphertext_digest": "blake3:1111111111111111111111111111111111111111111111111111111111111111", + "size": 0, + "location": "gatos-node://ed25519:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + "capability": "gatos-key://v1/aes-256-gcm/test-key-01" +} diff --git a/schemas/v1/policy/governance_policy.schema.json b/schemas/v1/policy/governance_policy.schema.json index 5c738b7d..42ff6baa 100644 --- a/schemas/v1/policy/governance_policy.schema.json +++ b/schemas/v1/policy/governance_policy.schema.json @@ -46,5 +46,39 @@ } } } + , + "privacy": { + "type": "object", + "additionalProperties": false, + "properties": { + "classes": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": false, + "properties": { + "min_entropy_bits": { "type": "integer", "minimum": 0 }, + "publish_plaintext_digest": { "type": "boolean" }, + "require_ciphertext_digest": { "type": "boolean" } + } + } + }, + "rules": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "select": { "type": "string" }, + "action": { "type": "string", "enum": ["redact", "pointerize"] }, + "class": { "type": "string" }, + "capability": { "type": "string", "format": "uri" }, + "location": { "type": "string", "format": "uri" } + }, + "required": ["select", "action"] + } + } + } + } } } diff --git a/schemas/v1/privacy/opaque_pointer.schema.json b/schemas/v1/privacy/opaque_pointer.schema.json new file mode 100644 index 00000000..9e899fc9 --- /dev/null +++ b/schemas/v1/privacy/opaque_pointer.schema.json @@ -0,0 +1,32 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "GATOS Opaque Pointer", + "description": "Canonical pointer to a private blob used in public projections.", + "type": "object", + "properties": { + "kind": { "type": "string", "const": "opaque_pointer" }, + "algo": { "type": "string", "const": "blake3" }, + "digest": { "type": "string", "pattern": "^blake3:[a-f0-9]{64}$" }, + "ciphertext_digest": { "type": "string", "pattern": "^blake3:[a-f0-9]{64}$" }, + "size": { "type": "integer", "minimum": 0 }, + "location": { "type": "string", "format": "uri" }, + "capability": { "type": "string", "format": "uri" }, + "extensions": { "type": "object" } + }, + "required": ["kind","algo","location","capability"], + "anyOf": [ + { + "required": ["digest"], + "properties": { + "digest": { "type": "string", "pattern": "^blake3:[a-f0-9]{64}$" } + } + }, + { + "required": ["ciphertext_digest"], + "properties": { + "ciphertext_digest": { "type": "string", "pattern": "^blake3:[a-f0-9]{64}$" } + } + } + ], + "additionalProperties": false +} diff --git a/scripts/validate_schemas.sh b/scripts/validate_schemas.sh index 6f0233d1..3771b34b 100755 --- a/scripts/validate_schemas.sh +++ b/scripts/validate_schemas.sh @@ -18,6 +18,7 @@ SCHEMAS=( "schemas/v1/governance/revocation.schema.json" "schemas/v1/governance/proof_of_consensus_envelope.schema.json" "schemas/v1/policy/governance_policy.schema.json" + "schemas/v1/privacy/opaque_pointer.schema.json" ) for schema in "${SCHEMAS[@]}"; do @@ -38,6 +39,7 @@ declare -A EXAMPLES=( ["schemas/v1/governance/grant.schema.json"]="examples/v1/governance/grant_min.json" ["schemas/v1/governance/revocation.schema.json"]="examples/v1/governance/revocation_min.json" ["schemas/v1/governance/proof_of_consensus_envelope.schema.json"]="examples/v1/governance/poc_envelope_min.json" + ["schemas/v1/privacy/opaque_pointer.schema.json"]="examples/v1/privacy/opaque_pointer_min.json" ) for schema in "${!EXAMPLES[@]}"; do @@ -52,6 +54,8 @@ done echo " - ajv validate: examples/v1/policy/governance_min.json against schemas/v1/policy/governance_policy.schema.json" ajv validate "${AJV_BASE_ARGS[@]}" -s schemas/v1/policy/governance_policy.schema.json -d examples/v1/policy/governance_min.json +echo " - ajv validate: examples/v1/policy/privacy_min.json against schemas/v1/policy/governance_policy.schema.json" +ajv validate "${AJV_BASE_ARGS[@]}" -s schemas/v1/policy/governance_policy.schema.json -d examples/v1/policy/privacy_min.json echo "[schemas] Additional encoding tests (ed25519 base64url forms)…" # Root schemas that reference defs using the canonical $id for proper resolution