From b3f3e700e1ca39a90d985795a2c9e235e26445e5 Mon Sep 17 00:00:00 2001 From: thepagent Date: Sat, 14 Mar 2026 02:11:28 +0000 Subject: [PATCH 1/3] docs: add gatekeeper sidecar design doc --- docs/gatekeeper.md | 146 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 docs/gatekeeper.md diff --git a/docs/gatekeeper.md b/docs/gatekeeper.md new file mode 100644 index 0000000..659f215 --- /dev/null +++ b/docs/gatekeeper.md @@ -0,0 +1,146 @@ +# Gatekeeper Sidecar Design + +## Overview + +Gatekeeper is a sidecar container that runs alongside the OpenClaw main container within the same Kubernetes pod. It acts as the sole holder of secrets, enforcing human-in-the-loop approval via Telegram before returning any secret to OpenClaw. + +OpenClaw itself holds **zero secrets** — no env vars, no files, no keys. + +## Architecture + +``` +┌─────────────────── K3s / K8s Cluster ─────────────────────────┐ +│ │ +│ ┌── AWS Secrets Manager ──┐ │ +│ │ openclaw/tokens │ │ +│ │ TELEGRAM_TOKEN_1 │ │ +│ │ TELEGRAM_TOKEN_2 │ │ +│ │ GATEWAY_TOKEN │ │ +│ └────────────┬─────────────┘ │ +│ │ IAM Role (sidecar SA only) │ +│ ▼ │ +│ ┌──────────────────── OpenClaw Pod ────────────────────────┐ │ +│ │ │ │ +│ │ ┌─────────────────────┐ ┌──────────────────────┐ │ │ +│ │ │ main (OpenClaw) │ │ gatekeeper (sidecar) │ │ │ +│ │ │ │ │ │ │ │ +│ │ │ ❌ no secrets │ │ ✅ IAM Role │ │ │ +│ │ │ ❌ no env keys │ │ ✅ fetches from AWS │ │ │ +│ │ │ ❌ no AWS access │ │ ✅ Telegram approval │ │ │ +│ │ │ │ │ ✅ rate limiting │ │ │ +│ │ │ exec: curl unix ───────► /tmp/gatekeeper.sock │ │ │ +│ │ │ socket → get secret│ │ │ │ │ │ +│ │ │ │ │ ▼ │ │ │ +│ │ │ │ │ 📱 Telegram notify │ │ │ +│ │ │ │ │ [✅ Approve][❌ Deny]│ │ │ +│ │ │ │ │ │ │ │ │ +│ │ │ ◄── secret returned────────────── ▼ │ │ │ +│ │ │ (stored in memory) │ │ fetch from AWS SM │ │ │ +│ │ └─────────────────────┘ └──────────────────────┘ │ │ +│ │ shared emptyDir volume (/tmp/gatekeeper.sock) │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +└────────────────────────────────────────────────────────────────┘ + + 📱 Your Phone + ┌──────────────────────┐ + │ 🔐 Gatekeeper Alert │ + │ │ + │ OpenClaw requests │ + │ secret access │ + │ Time: 09:58 │ + │ │ + │ [✅ Approve][❌ Deny] │ + └──────────────────────┘ +``` + +## Security Model + +| Threat | Mitigation | +|--------|-----------| +| Agent reads env vars | No secrets in main container env | +| Agent reads sidecar filesystem | Different container — filesystem isolated | +| Agent calls unix socket directly | Sidecar sends Telegram alert — you deny | +| Agent runs `aws secretsmanager get-secret-value` | Main container SA has no IAM permissions | +| `kubectl exec` into sidecar | Requires cluster RBAC — agent doesn't have it | +| Brute-force socket requests | Rate limit: max 1 request per 5 minutes; alerts on repeated attempts | + +## Components + +### 1. AWS Secrets Manager +- Stores all tokens and keys under a single path (e.g. `openclaw/tokens`) +- Supports automatic rotation +- Every access is logged in CloudTrail + +### 2. Gatekeeper Sidecar +- Minimal container (Alpine-based) +- Listens on `/tmp/gatekeeper.sock` (shared `emptyDir` volume) +- On request: + 1. Sends Telegram approval message with Approve / Deny buttons + 2. Waits up to `approvalTimeoutSeconds` for response + 3. On approval: fetches secret from AWS Secrets Manager via IAM Role, returns to caller + 4. On denial or timeout: returns error +- Rate limit: configurable, default 1 request per 5 minutes +- Alerts on anomalous repeated requests + +### 3. IAM / RBAC Separation +- Gatekeeper uses a dedicated `ServiceAccount` annotated with an IAM Role (IRSA or EKS Pod Identity) +- Main container uses a separate `ServiceAccount` with **no AWS permissions** +- RBAC: neither SA has `exec` rights into pods + +## Helm Values + +```yaml +gatekeeper: + enabled: false # opt-in + image: + repository: ghcr.io/thepagent/openclaw-gatekeeper + tag: latest + aws: + region: ap-northeast-1 + secretsManagerPath: openclaw/tokens + telegram: + approvalTimeoutSeconds: 60 + rateLimitMinutes: 5 + serviceAccount: + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT_ID:role/openclaw-gatekeeper +``` + +## Repository Layout + +``` +openclaw-helm/ +├── gatekeeper/ # sidecar source code +│ ├── Dockerfile +│ ├── main.py (or main.go) +│ └── requirements.txt +├── templates/ +│ ├── deployment.yaml # injects sidecar when gatekeeper.enabled=true +│ └── serviceaccount.yaml # separate SA for gatekeeper +├── docs/ +│ └── gatekeeper.md # this document +└── .github/workflows/ + └── gatekeeper-image.yml # CI: build & push gatekeeper image on change +``` + +## CI / CD + +A GitHub Actions workflow (`gatekeeper-image.yml`) will: +1. Trigger on changes to `gatekeeper/**` +2. Build the Docker image +3. Push to `ghcr.io/thepagent/openclaw-gatekeeper` with the commit SHA tag and `latest` + +## Deployment Flow + +1. Create IAM Role with `secretsmanager:GetSecretValue` on `openclaw/tokens` +2. Store secrets in AWS Secrets Manager +3. Set `gatekeeper.enabled: true` and configure `values.yaml` +4. `helm upgrade --install openclaw oci://ghcr.io/thepagent/openclaw-helm -f values.yaml` +5. On first OpenClaw startup, approve the Telegram request on your phone + +## Future Considerations + +- **Audit log**: persist approval/denial events to a local file or CloudWatch +- **Multi-secret support**: allow OpenClaw to request individual named secrets rather than the full bundle +- **mTLS over socket**: replace plain unix socket with mTLS for stronger channel integrity From 346a33de252b84d38e054f80864e5a7f8090bf7b Mon Sep 17 00:00:00 2001 From: thepagent Date: Sat, 14 Mar 2026 02:12:18 +0000 Subject: [PATCH 2/3] docs: add problem statement to gatekeeper design doc --- docs/gatekeeper.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/gatekeeper.md b/docs/gatekeeper.md index 659f215..13b61e1 100644 --- a/docs/gatekeeper.md +++ b/docs/gatekeeper.md @@ -1,5 +1,24 @@ # Gatekeeper Sidecar Design +## Problem + +OpenClaw is an AI agent gateway. The agent can execute arbitrary tools — bash commands, Python scripts, HTTP calls — as part of its reasoning loop. This creates a fundamental tension: + +> The agent needs secrets (API tokens, bot keys) to function, but giving the agent access to its own secrets means a compromised or misbehaving agent can exfiltrate them. + +Current risk with naive approaches: + +- Secrets in env vars → agent reads `process.env` or `/proc/self/environ` +- Secrets in K8s Secret mounted as files → agent reads the file directly +- Secrets in K8s Secret as env vars → same problem +- No audit trail → you don't know when or why a secret was accessed + +The core problem: **the agent and its secrets live in the same trust boundary.** + +## Goal + +Move secrets out of the agent's trust boundary entirely, while still allowing the agent to function — with a human approval gate on every secret access. + ## Overview Gatekeeper is a sidecar container that runs alongside the OpenClaw main container within the same Kubernetes pod. It acts as the sole holder of secrets, enforcing human-in-the-loop approval via Telegram before returning any secret to OpenClaw. From 4a6a92804f9354e1bae07c9f57b0cefbd2616108 Mon Sep 17 00:00:00 2001 From: thepagent Date: Sat, 14 Mar 2026 02:14:37 +0000 Subject: [PATCH 3/3] feat: add gatekeeper sidecar in Rust + CI workflow --- .github/workflows/gatekeeper-image.yml | 35 +++++ gatekeeper/Cargo.toml | 21 +++ gatekeeper/Dockerfile | 11 ++ gatekeeper/src/main.rs | 197 +++++++++++++++++++++++++ 4 files changed, 264 insertions(+) create mode 100644 .github/workflows/gatekeeper-image.yml create mode 100644 gatekeeper/Cargo.toml create mode 100644 gatekeeper/Dockerfile create mode 100644 gatekeeper/src/main.rs diff --git a/.github/workflows/gatekeeper-image.yml b/.github/workflows/gatekeeper-image.yml new file mode 100644 index 0000000..4091695 --- /dev/null +++ b/.github/workflows/gatekeeper-image.yml @@ -0,0 +1,35 @@ +name: gatekeeper-image + +on: + push: + branches: [main] + paths: [gatekeeper/**] + pull_request: + paths: [gatekeeper/**] + +env: + IMAGE: ghcr.io/${{ github.repository_owner }}/openclaw-gatekeeper + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v4 + + - uses: docker/login-action@v3 + if: github.event_name == 'push' + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - uses: docker/build-push-action@v5 + with: + context: gatekeeper + push: ${{ github.event_name == 'push' }} + tags: | + ${{ env.IMAGE }}:latest + ${{ env.IMAGE }}:${{ github.sha }} diff --git a/gatekeeper/Cargo.toml b/gatekeeper/Cargo.toml new file mode 100644 index 0000000..9b68bf9 --- /dev/null +++ b/gatekeeper/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "gatekeeper" +version = "0.1.0" +edition = "2021" + +[dependencies] +tokio = { version = "1", features = ["full"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +aws-config = { version = "1", features = ["behavior-version-latest"] } +aws-sdk-secretsmanager = "1" +reqwest = { version = "0.12", features = ["json"] } +zeroize = { version = "1", features = ["derive"] } +tracing = "0.1" +tracing-subscriber = "0.3" +anyhow = "1" + +[profile.release] +opt-level = "z" +lto = true +strip = true diff --git a/gatekeeper/Dockerfile b/gatekeeper/Dockerfile new file mode 100644 index 0000000..7cb93ce --- /dev/null +++ b/gatekeeper/Dockerfile @@ -0,0 +1,11 @@ +FROM rust:1.77-alpine AS builder +RUN apk add --no-cache musl-dev pkgconfig openssl-dev +WORKDIR /build +COPY Cargo.toml Cargo.lock ./ +COPY src ./src +RUN cargo build --release + +FROM alpine:3.19 +RUN apk add --no-cache ca-certificates +COPY --from=builder /build/target/release/gatekeeper /usr/local/bin/gatekeeper +ENTRYPOINT ["/usr/local/bin/gatekeeper"] diff --git a/gatekeeper/src/main.rs b/gatekeeper/src/main.rs new file mode 100644 index 0000000..6a989c1 --- /dev/null +++ b/gatekeeper/src/main.rs @@ -0,0 +1,197 @@ +use anyhow::Result; +use aws_sdk_secretsmanager::Client as SmClient; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::net::UnixListener; +use tokio::sync::Mutex; +use tracing::{info, warn}; +use zeroize::Zeroizing; + +const SOCKET_PATH: &str = "/tmp/gatekeeper.sock"; +const RATE_LIMIT_SECS: u64 = 300; // 5 minutes + +#[derive(Deserialize)] +struct SecretRequest { + name: String, +} + +#[derive(Serialize)] +struct SecretResponse { + #[serde(skip_serializing_if = "Option::is_none")] + value: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, +} + +struct State { + sm_client: SmClient, + tg_bot_token: String, + tg_chat_id: String, + last_request: Option, +} + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt::init(); + + let tg_bot_token = std::env::var("GATEKEEPER_TG_BOT_TOKEN") + .expect("GATEKEEPER_TG_BOT_TOKEN required"); + let tg_chat_id = std::env::var("GATEKEEPER_TG_CHAT_ID") + .expect("GATEKEEPER_TG_CHAT_ID required"); + + let aws_cfg = aws_config::load_from_env().await; + let sm_client = SmClient::new(&aws_cfg); + + let state = Arc::new(Mutex::new(State { + sm_client, + tg_bot_token, + tg_chat_id, + last_request: None, + })); + + let _ = std::fs::remove_file(SOCKET_PATH); + let listener = UnixListener::bind(SOCKET_PATH)?; + info!("gatekeeper listening on {}", SOCKET_PATH); + + loop { + let (stream, _) = listener.accept().await?; + let state = Arc::clone(&state); + tokio::spawn(async move { + if let Err(e) = handle(stream, state).await { + warn!("handle error: {e}"); + } + }); + } +} + +async fn handle(stream: tokio::net::UnixStream, state: Arc>) -> Result<()> { + let (reader, mut writer) = stream.into_split(); + let mut line = String::new(); + BufReader::new(reader).read_line(&mut line).await?; + + let req: SecretRequest = serde_json::from_str(line.trim())?; + + let resp = { + let mut s = state.lock().await; + + // rate limit + if let Some(last) = s.last_request { + if last.elapsed() < Duration::from_secs(RATE_LIMIT_SECS) { + let remaining = RATE_LIMIT_SECS - last.elapsed().as_secs(); + warn!("rate limited — {} seconds remaining", remaining); + SecretResponse { + value: None, + error: Some(format!("rate limited, retry in {remaining}s")), + } + } else { + fetch_with_approval(&mut s, &req.name).await + } + } else { + fetch_with_approval(&mut s, &req.name).await + } + }; + + let mut out = serde_json::to_string(&resp)?; + out.push('\n'); + writer.write_all(out.as_bytes()).await?; + Ok(()) +} + +async fn fetch_with_approval(s: &mut State, secret_name: &str) -> SecretResponse { + s.last_request = Some(Instant::now()); + + // send Telegram approval request + let approved = match request_approval(&s.tg_bot_token, &s.tg_chat_id, secret_name).await { + Ok(v) => v, + Err(e) => { + warn!("telegram error: {e}"); + return SecretResponse { value: None, error: Some("telegram error".into()) }; + } + }; + + if !approved { + warn!("request denied by operator"); + return SecretResponse { value: None, error: Some("denied".into()) }; + } + + // fetch from AWS Secrets Manager + match s.sm_client + .get_secret_value() + .secret_id(secret_name) + .send() + .await + { + Ok(out) => { + let secret = Zeroizing::new( + out.secret_string().unwrap_or_default().to_string() + ); + info!("secret '{}' returned to caller", secret_name); + SecretResponse { value: Some(secret.to_string()), error: None } + } + Err(e) => { + warn!("secrets manager error: {e}"); + SecretResponse { value: None, error: Some("aws error".into()) } + } + } +} + +/// Sends a Telegram message with Approve/Deny inline buttons and polls for the answer. +/// Returns true if approved within 60 seconds. +async fn request_approval(bot_token: &str, chat_id: &str, secret_name: &str) -> Result { + let client = reqwest::Client::new(); + let base = format!("https://api.telegram.org/bot{bot_token}"); + + // send message + let body = serde_json::json!({ + "chat_id": chat_id, + "text": format!("🔐 Gatekeeper: secret access requested\n\nSecret: `{secret_name}`\n\nApprove?"), + "parse_mode": "Markdown", + "reply_markup": { + "inline_keyboard": [[ + {"text": "✅ Approve", "callback_data": "approve"}, + {"text": "❌ Deny", "callback_data": "deny"} + ]] + } + }); + + let send_resp: serde_json::Value = client + .post(format!("{base}/sendMessage")) + .json(&body) + .send().await? + .json().await?; + + let message_id = send_resp["result"]["message_id"].as_i64().unwrap_or(0); + + // poll for callback (max 60s) + let deadline = Instant::now() + Duration::from_secs(60); + let mut offset: i64 = 0; + + while Instant::now() < deadline { + let updates: serde_json::Value = client + .get(format!("{base}/getUpdates")) + .query(&[("timeout", "5"), ("allowed_updates", "callback_query")]) + .query(&[("offset", offset.to_string())]) + .send().await? + .json().await?; + + if let Some(arr) = updates["result"].as_array() { + for update in arr { + offset = update["update_id"].as_i64().unwrap_or(0) + 1; + let cb = &update["callback_query"]; + if cb["message"]["message_id"].as_i64() == Some(message_id) { + let data = cb["data"].as_str().unwrap_or(""); + let cb_id = cb["id"].as_str().unwrap_or(""); + // acknowledge + let _ = client.post(format!("{base}/answerCallbackQuery")) + .json(&serde_json::json!({"callback_query_id": cb_id})) + .send().await; + return Ok(data == "approve"); + } + } + } + } + + Ok(false) // timeout = deny +}