From a6bf66998f54151bb08cc5cc835a147b5b22ba95 Mon Sep 17 00:00:00 2001
From: thelastpoet <emcie4@gmail.com>
Date: Thu, 19 Feb 2026 21:12:41 +0300
Subject: [PATCH] fix: harden public deployment surfaces

---
 README.md                                     | 11 ++++
 contracts/api/openapi.yaml                    |  1 +
 .../schemas/moderation-request.schema.json    |  3 +-
 docker-compose.hardened.yml                   | 59 +++++++++++++++++++
 docs/deployment.md                            |  2 +
 docs/integration-guide.md                     |  2 +-
 infra/nginx/sentinel_public.conf              | 47 +++++++++++++++
 pyproject.toml                                |  2 +-
 src/sentinel_api/main.py                      | 24 +++++++-
 src/sentinel_api/rate_limit.py                | 14 ++++-
 tests/test_api.py                             | 17 ++++++
 11 files changed, 175 insertions(+), 7 deletions(-)
 create mode 100644 docker-compose.hardened.yml
 create mode 100644 infra/nginx/sentinel_public.conf

diff --git a/README.md b/README.md
index 9a13161..b4f3ee3 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,17 @@ export SENTINEL_API_KEY='your-key-here' && make run
 
 See the full [Quickstart guide](docs/quickstart.md) for detailed instructions.
 
+### Hardened Docker entrypoint (recommended for public hosting)
+
+If you're deploying Sentinel on a host that is reachable from the public internet, run it behind a reverse proxy and do **not** expose operator endpoints (`/metrics*`, `/admin/*`, `/internal/*`) publicly.
+
+This repo includes a hardened Compose file that exposes only `/health` and `/v1/moderate` on port 8000:
+
+```bash
+export SENTINEL_API_KEY='replace-with-a-strong-key'
+docker compose -f docker-compose.hardened.yml up -d --build
+```
+
 ## Project maturity
 
 Sentinel ships with a **7-term demonstration seed lexicon**. This is enough to validate the system works end-to-end, but production deployment requires building out your own lexicon with domain-expert annotation.
diff --git a/contracts/api/openapi.yaml b/contracts/api/openapi.yaml
index 15bba47..99597b5 100644
--- a/contracts/api/openapi.yaml
+++ b/contracts/api/openapi.yaml
@@ -140,6 +140,7 @@ components:
         request_id:
           type: string
           maxLength: 128
+          pattern: '^[A-Za-z0-9][A-Za-z0-9._:-]{0,127}$'
     ModerationResponse:
       type: object
       additionalProperties: false
diff --git a/contracts/schemas/moderation-request.schema.json b/contracts/schemas/moderation-request.schema.json
index fad4272..7a4bbcd 100644
--- a/contracts/schemas/moderation-request.schema.json
+++ b/contracts/schemas/moderation-request.schema.json
@@ -22,7 +22,8 @@
     },
     "request_id": {
       "type": "string",
-      "maxLength": 128
+      "maxLength": 128,
+      "pattern": "^[A-Za-z0-9][A-Za-z0-9._:-]{0,127}$"
     }
   }
 }
diff --git a/docker-compose.hardened.yml b/docker-compose.hardened.yml
new file mode 100644
index 0000000..845718a
--- /dev/null
+++ b/docker-compose.hardened.yml
@@ -0,0 +1,59 @@
+services:
+  proxy:
+    image: nginx:1.27-alpine
+    ports:
+      - "8000:80"
+    depends_on:
+      api:
+        condition: service_started
+    volumes:
+      - ./infra/nginx/sentinel_public.conf:/etc/nginx/conf.d/default.conf:ro
+
+  api:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      SENTINEL_API_KEY: ${SENTINEL_API_KEY:?SENTINEL_API_KEY must be set}
+      SENTINEL_RATE_LIMIT_PER_MINUTE: 120
+      SENTINEL_DATABASE_URL: postgresql://sentinel:sentinel@postgres:5432/sentinel
+      SENTINEL_REDIS_URL: redis://redis:6379/0
+      SENTINEL_ELECTORAL_PHASE: ${SENTINEL_ELECTORAL_PHASE:-}
+      SENTINEL_LEXICON_PATH: /app/data/lexicon_seed.json
+      SENTINEL_POLICY_CONFIG_PATH: /app/config/policy/default.json
+    expose:
+      - "8000"
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+
+  postgres:
+    image: pgvector/pgvector:pg16
+    environment:
+      POSTGRES_DB: sentinel
+      POSTGRES_USER: sentinel
+      POSTGRES_PASSWORD: sentinel
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U sentinel -d sentinel"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    volumes:
+      - pgdata:/var/lib/postgresql/data
+      - ./infra/postgres-init.sql:/docker-entrypoint-initdb.d/01-init.sql:ro
+
+  redis:
+    image: redis:7-alpine
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 3s
+      retries: 5
+    volumes:
+      - redisdata:/data
+
+volumes:
+  pgdata:
+  redisdata:
diff --git a/docs/deployment.md b/docs/deployment.md
index 20e9372..341431d 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -338,6 +338,8 @@ curl http://localhost:8000/metrics/prometheus
 
 The metrics endpoint returns action counts, HTTP status counts, latency histogram buckets, and validation error counts.
 
+For public deployments, do not expose these endpoints on the public internet. Use network controls or a reverse proxy to restrict `/metrics*` to internal monitoring systems. The repository includes `docker-compose.hardened.yml`, which routes only `/health` and `/v1/moderate` through a proxy and blocks operator surfaces by default.
+
 ### Structured logging
 
 Sentinel propagates `X-Request-ID` headers through all requests. If the client provides one, Sentinel uses it; otherwise one is generated. Use this ID to correlate logs across your infrastructure.
diff --git a/docs/integration-guide.md b/docs/integration-guide.md
index 51d3cea..92d00a7 100644
--- a/docs/integration-guide.md
+++ b/docs/integration-guide.md
@@ -39,7 +39,7 @@ Keep this key server-side. If it leaks to a client, rotate it immediately.
 | `context.source` | string | No | Max 100 chars | Where the content came from (e.g., "forum-post", "comment") |
 | `context.locale` | string | No | Max 20 chars | Geographic locale (e.g., "ke" for Kenya) |
 | `context.channel` | string | No | Max 50 chars | Content channel or category |
-| `request_id` | string | No | Max 128 chars | Client-provided idempotency/correlation ID |
+| `request_id` | string | No | Max 128 chars; `^[A-Za-z0-9][A-Za-z0-9._:-]{0,127}$` | Client-provided idempotency/correlation ID |
 
 If you don't provide `request_id`, Sentinel generates one and returns it in the `X-Request-ID` response header.
 
diff --git a/infra/nginx/sentinel_public.conf b/infra/nginx/sentinel_public.conf
new file mode 100644
index 0000000..5cd6650
--- /dev/null
+++ b/infra/nginx/sentinel_public.conf
@@ -0,0 +1,47 @@
+upstream sentinel_api {
+  server api:8000;
+  keepalive 16;
+}
+
+server {
+  listen 80;
+  server_name _;
+
+  # Keep payloads bounded; API enforces a much smaller max text size.
+  client_max_body_size 64k;
+
+  add_header X-Content-Type-Options "nosniff" always;
+  add_header X-Frame-Options "DENY" always;
+  add_header Referrer-Policy "no-referrer" always;
+
+  location = /health {
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    proxy_pass http://sentinel_api;
+  }
+
+  location = /v1/moderate {
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    proxy_pass http://sentinel_api;
+  }
+
+  # Hardened-by-default: do not expose operator surfaces to the public internet.
+  # If you need these endpoints, route them via a separate internal listener/service
+  # or add auth + IP allowlisting here.
+  location ~ ^/(metrics|metrics/prometheus)$ {
+    return 403;
+  }
+
+  location ~ ^/(admin|internal)/ {
+    return 404;
+  }
+
+  location / {
+    return 404;
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
index 38b4375..31fb89d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,7 @@ ml = [
 ]
 
 [tool.pytest.ini_options]
-pythonpath = ["src"]
+pythonpath = ["src", "."]
 testpaths = ["tests"]
 
 [tool.ruff]
diff --git a/src/sentinel_api/main.py b/src/sentinel_api/main.py
index 11cfb26..3352e44 100644
--- a/src/sentinel_api/main.py
+++ b/src/sentinel_api/main.py
@@ -2,6 +2,7 @@
 
 import json
 import os
+import re
 import secrets
 import time
 from collections.abc import AsyncIterator, Sequence
@@ -60,6 +61,20 @@
 logger = get_logger("sentinel.api")
 CLASSIFIER_SHADOW_ENABLED_ENV = "SENTINEL_CLASSIFIER_SHADOW_ENABLED"
 SHADOW_PREDICTIONS_PATH_ENV = "SENTINEL_SHADOW_PREDICTIONS_PATH"
+_REQUEST_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:-]{0,127}$")
+
+
+def _coerce_request_id(value: str | None) -> str | None:
+    if value is None:
+        return None
+    normalized = value.strip()
+    if not normalized:
+        return None
+    if len(normalized) > 128:
+        return None
+    if not _REQUEST_ID_RE.match(normalized):
+        return None
+    return normalized
 
 
 @asynccontextmanager
@@ -94,12 +109,12 @@ class AdminProposalReviewResponse(BaseModel):
 
 @app.middleware("http")
 async def request_context_middleware(request: Request, call_next):  # type: ignore[no-untyped-def]
-    request_id = request.headers.get("X-Request-ID", str(uuid4()))
+    request_id = _coerce_request_id(request.headers.get("X-Request-ID")) or str(uuid4())
     request.state.request_id = request_id
     start = time.perf_counter()
     response = await call_next(request)
     duration_ms = int((time.perf_counter() - start) * 1000)
-    resolved_request_id = response.headers.get("X-Request-ID", request_id)
+    resolved_request_id = _coerce_request_id(response.headers.get("X-Request-ID")) or request_id
     response.headers["X-Request-ID"] = resolved_request_id
     metrics.record_http_status(response.status_code)
     logger.info(
@@ -470,6 +485,11 @@ def moderate_text(
     _: None = Depends(require_api_key),
     __: None = Depends(enforce_rate_limit),
 ) -> ModerationResponse:
+    if request.request_id is not None and _coerce_request_id(request.request_id) is None:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="request_id contains invalid characters",
+        )
     effective_request_id = request.request_id or http_request.state.request_id
     runtime = resolve_policy_runtime()
     result = moderate(request.text, runtime=runtime)
diff --git a/src/sentinel_api/rate_limit.py b/src/sentinel_api/rate_limit.py
index c2f5f8e..c284100 100644
--- a/src/sentinel_api/rate_limit.py
+++ b/src/sentinel_api/rate_limit.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import hashlib
 import logging
 import os
 import time
@@ -18,6 +19,14 @@
 
 logger = logging.getLogger(__name__)
 
+_RATE_LIMIT_KEY_PREFIX = "sentinel-rate-limit:"
+
+
+def _rate_limit_bucket_key(key: str) -> str:
+    # Avoid storing raw API keys in memory or Redis keyspace.
+    digest = hashlib.sha256(key.encode("utf-8")).hexdigest()
+    return digest
+
 
 @dataclass(frozen=True)
 class RateLimitDecision:
@@ -40,7 +49,8 @@ def _cleanup(self, bucket: deque[float], now: float) -> None:
 
     def check(self, key: str) -> RateLimitDecision:
         now = time.time()
-        bucket = self._events[key]
+        bucket_key = _rate_limit_bucket_key(key)
+        bucket = self._events[bucket_key]
         self._cleanup(bucket, now)
 
         if not bucket:
@@ -102,7 +112,7 @@ def check(self, key: str) -> RateLimitDecision:
         # Preserve existing response contract while shifting enforcement to
         # distributed limits storage (Redis/memcached/etc.).
         now = time.time()
-        normalized_key = f"sentinel-rate-limit:{key}"
+        normalized_key = f"{_RATE_LIMIT_KEY_PREFIX}{_rate_limit_bucket_key(key)}"
         item = self._rate_limit_item_cls(self.per_minute)
         try:
             allowed = bool(self._limiter.hit(item, normalized_key))
diff --git a/tests/test_api.py b/tests/test_api.py
index 6851f14..f563945 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -64,6 +64,23 @@ def test_moderate_uses_body_request_id_for_header() -> None:
     assert response.headers["X-Request-ID"] == "client-123"
 
 
+def test_moderate_rejects_invalid_body_request_id() -> None:
+    response = client.post(
+        "/v1/moderate",
+        json={"text": "This is peaceful speech", "request_id": "bad id"},
+        headers={"X-API-Key": TEST_API_KEY},
+    )
+    assert response.status_code == 400
+    payload = response.json()
+    assert payload["error_code"] == "HTTP_400"
+
+
+def test_middleware_ignores_invalid_header_request_id() -> None:
+    response = client.get("/health", headers={"X-Request-ID": "bad id"})
+    assert response.status_code == 200
+    assert response.headers["X-Request-ID"] != "bad id"
+
+
 def test_moderate_block_path() -> None:
     response = client.post(
         "/v1/moderate",