From 6c4a82a2c81d7290621683fdfa97a8decc86da57 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 13:29:57 +0200
Subject: [PATCH 01/12] docs: update architecture documentation with telemetry
 and model details

- Add C3 connection in container architecture flowchart
- Rename "Azure Monitor" to "Application Insights" in telemetry diagram
- Add Prometheus as a telemetry sink with implementation details
- Document retention policies for Application Insights
- Update matrix gateway JSON example with new field names
- Fix confidence threshold in matrix-gateway from 0.70 to 0.75
- Update SOP suggestion threshold from 0.78 to 0.8
- Fix code block formatting in multiple files
---
 .../architecture/02-container-architecture.md |  1 +
 .../04-observability-telemetry.md             | 27 ++++++++++++++++++-
 docs/architecture/reference/matrix-gateway.md | 10 +++----
 .../architecture/reference/matrix-rooivalk.md |  4 +--
 .../reference/slm-management-plan.md          |  2 +-
 5 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/docs/architecture/02-container-architecture.md b/docs/architecture/02-container-architecture.md
index 2b66950..b34021d 100644
--- a/docs/architecture/02-container-architecture.md
+++ b/docs/architecture/02-container-architecture.md
@@ -57,6 +57,7 @@ flowchart TB
 
     C1 --> G1
     C2 --> G1
+    C3 --> G1
     C4 --> G1
 
     G1 --> G2
diff --git a/docs/architecture/04-observability-telemetry.md b/docs/architecture/04-observability-telemetry.md
index 3afe313..79e7f9f 100644
--- a/docs/architecture/04-observability-telemetry.md
+++ b/docs/architecture/04-observability-telemetry.md
@@ -34,8 +34,9 @@ flowchart TB
 
     subgraph Ingest
         I1[OpenTelemetry]
-        I2[Azure Monitor]
+        I2[Application Insights]
         I3[Blob Export]
+        I4[Prometheus]
     end
 
     subgraph Analytics
@@ -62,15 +63,39 @@ flowchart TB
     S4 --> I1
     S5 --> I2
     S6 --> I3
+    S5 --> I4
 
     I1 --> A1
     I2 --> A1
     I3 --> A1
+    I4 --> V1
 
     A1 --> V1
     V1 --> V2
 ```
 
+### Telemetry Sinks
+
+LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint that scrapes application metrics. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.
+
+The primary telemetry sinks are:
+
+- **OpenTelemetry**: Traces and spans
+- **Application Insights**: Azure Monitor implementation using `APPLICATIONINSIGHTS_CONNECTION_STRING` env var for OTEL exporter
+- **Blob Export**: Raw event storage
+- **Prometheus**: Application metrics via `/metrics` endpoint
+
+## Retention Policies
+
+Application Insights retention defaults:
+
+- **Production**: 90 days
+- **Non-production (dev/staging)**: 30 days
+
+These are environment-specific settings configured in the Application Insights resource. Operators can adjust retention in the Azure Portal under Application Insights resource settings.
+
+Include retention expectations in operational runbooks to align cost and data availability expectations.
+
 ## Key Metrics
 
 ### Gateway
diff --git a/docs/architecture/reference/matrix-gateway.md b/docs/architecture/reference/matrix-gateway.md
index 4551887..55b8dbb 100644
--- a/docs/architecture/reference/matrix-gateway.md
+++ b/docs/architecture/reference/matrix-gateway.md
@@ -41,12 +41,12 @@ flowchart TD
 
 ```json
 {
-  "intent": "code_review",
+  "request_id": "req_abc123",
+  "label": "code_review",
   "complexity": "medium",
   "tool_candidate": true,
-  "recommended_target": "codeflow-engine",
-  "recommended_model_tier": "small",
-  "escalation_required": false,
+  "recommended_tier": "slm",
+  "cacheable": true,
   "confidence": 0.93
 }
 ```
@@ -91,7 +91,7 @@ interface PolicyScreenOutput {
 | Condition                        | Action                 |
 | -------------------------------- | ---------------------- |
 | `policy-screen.allowed == false` | Block or redact        |
-| `confidence < 0.70`              | Escalate to LLM        |
+| `confidence < 0.75`              | Escalate to LLM        |
 | Tool suggested but no mapping    | Send to general LLM    |
 | Tagging fails                    | Mark telemetry partial |
 
diff --git a/docs/architecture/reference/matrix-rooivalk.md b/docs/architecture/reference/matrix-rooivalk.md
index 29f20da..147c7b0 100644
--- a/docs/architecture/reference/matrix-rooivalk.md
+++ b/docs/architecture/reference/matrix-rooivalk.md
@@ -24,7 +24,7 @@ flowchart TD
 
 ## CRITICAL: SLM is for Reporting Only
 
-```
+```text
 ┌─────────────────────────────────────────────────────────┐
 │                   IMPORTANT - SAFETY BOUNDARY            │
 ├─────────────────────────────────────────────────────────┤
@@ -109,7 +109,7 @@ interface SuggestSopOutput {
 ```typescript
 const DEFAULT_THRESHOLDS = {
   operator_summary: { direct_use: 0.8, facts_only: 0.65 },
-  sop_suggestion: { direct_suggest: 0.78, manual_lookup: 0.65 },
+  sop_suggestion: { direct_suggest: 0.8, manual_lookup: 0.65 },
 };
 ```
 
diff --git a/docs/architecture/reference/slm-management-plan.md b/docs/architecture/reference/slm-management-plan.md
index 7c116c5..d2b0f2e 100644
--- a/docs/architecture/reference/slm-management-plan.md
+++ b/docs/architecture/reference/slm-management-plan.md
@@ -40,7 +40,7 @@ Maintain a tiered model portfolio:
 
 Implement cost controls at each layer:
 
-```
+```text
 Cost Control Layers
 ┌─────────────────────────────────────┐
 │ 1. Budget caps per project          │

From 0e69a2f3d6622f207bf6c20878d945c40e9b1c31 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:14:45 +0200
Subject: [PATCH 02/12] feat(workflows): add GitHub environment support to
 deployment workflows

Add environment input parameter to deploy-environment.yaml workflow to specify GitHub environment, improving deployment control and security. Replace hardcoded environment settings in deploy.yaml with the new parameter. Also fix code fences in documentation to use text format and update various documentation details.
---
 .github/workflows/deploy-environment.yaml     | 43 ++++++++++++++++++-
 .github/workflows/deploy.yaml                 | 42 ++++++++++++++++--
 .../reference/slm-implementation-matrix.md    |  8 ++--
 .../reference/slm-management-plan.md          |  6 +--
 .../strategic/07-deployment-model.md          | 16 +++----
 docs/architecture/systems/agentkit-forge.md   |  2 +-
 docs/architecture/systems/ai-gateway.md       | 30 ++++++++++++-
 docs/architecture/systems/codeflow-engine.md  |  4 +-
 docs/architecture/systems/cognitive-mesh.md   |  4 +-
 docs/architecture/systems/phoenix-rooivalk.md |  2 +-
 docs/planning/request_to_token_attribution.md | 12 +++---
 11 files changed, 136 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
index 1485e2e..c35a239 100644
--- a/.github/workflows/deploy-environment.yaml
+++ b/.github/workflows/deploy-environment.yaml
@@ -38,7 +38,26 @@ on:
         type: boolean
         default: false
         description: Include AOAI endpoint host validation
+      environment:
+        required: false
+        type: string
+        default: ""
+        description: GitHub environment to use
     secrets:
+      AZURE_CLIENT_ID:
+        required: true
+      AZURE_TENANT_ID:
+        required: true
+      AZURE_SUBSCRIPTION_ID:
+        required: true
+      TF_BACKEND_RG:
+        required: true
+      TF_BACKEND_SA:
+        required: true
+      TF_BACKEND_CONTAINER:
+        required: true
+      EXPECTED_AOAI_ENDPOINT_HOST:
+        required: false
       AZURE_OPENAI_ENDPOINT:
         required: true
       AZURE_OPENAI_API_KEY:
@@ -49,8 +68,25 @@ on:
         required: true
       AIGATEWAY_KEY:
         required: true
+      STATE_SERVICE_CONTAINER_IMAGE:
+        required: false
+      STATE_SERVICE_SHARED_TOKEN:
+        required: false
+      STATE_SERVICE_REGISTRY_PASSWORD:
+        required: false
+      DASHBOARD_CONTAINER_IMAGE:
+        required: false
+      GRAFANA_URL:
+        required: false
 
 env:
+  AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+  AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+  AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+  TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+  TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+  TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+  EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
   TF_VAR_env: ${{ inputs.env_name }}
   TF_VAR_projname: "aigateway"
   TF_VAR_location: "southafricanorth"
@@ -64,10 +100,15 @@ env:
   TF_VAR_codex_api_version: ${{ inputs.codex_api_version }}
   TF_VAR_embedding_deployment: "text-embedding-3-large"
   TF_VAR_embeddings_api_version: "2024-02-01"
+  TF_VAR_state_service_container_image: ${{ secrets.STATE_SERVICE_CONTAINER_IMAGE }}
+  TF_VAR_secrets_expiration_date: "2027-03-31T00:00:00Z"
+  TF_VAR_dashboard_container_image: ${{ secrets.DASHBOARD_CONTAINER_IMAGE || 'ghcr.io/phoenixvc/ai-gateway-dashboard:latest' }}
+  TF_VAR_grafana_url: ${{ secrets.GRAFANA_URL }}
 
 jobs:
   deploy:
     runs-on: ubuntu-latest
+    environment: ${{ inputs.environment || inputs.env_name }}
     defaults:
       run:
         working-directory: ${{ inputs.terraform_working_directory }}
@@ -208,7 +249,7 @@ jobs:
           aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
           max_attempts: "3"
           retry_sleep: ${{ inputs.smoke_retry_sleep }}
-          models_wait_attempts: ${{ if(inputs.env_name == 'prod', '3', '1') }}
+          models_wait_attempts: ${{ inputs.env_name == 'prod' && '3' || '1' }}
           models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}
 
       - name: Smoke test shared state API (dashboard proxy)
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 59ece73..97c8e2d 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -150,7 +150,6 @@ jobs:
     name: Deploy dev
     needs: plan
     if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev'
-    environment: dev
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: dev
@@ -161,18 +160,30 @@ jobs:
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
       include_aoai_host_check: false
+      environment: dev
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   deploy-staging:
     name: Deploy staging
     needs: plan
     if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging')
-    environment: staging
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: staging
@@ -183,18 +194,30 @@ jobs:
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
       include_aoai_host_check: false
+      environment: staging
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   deploy-prod:
     name: Deploy prod
     needs: plan
     if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
-    environment: prod
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: prod
@@ -205,11 +228,24 @@ jobs:
       smoke_retry_sleep: "15"
       smoke_models_wait_sleep: "30"
       include_aoai_host_check: true
+      environment: prod
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   # Legacy inline deployments removed - see deploy-environment.yaml
diff --git a/docs/architecture/reference/slm-implementation-matrix.md b/docs/architecture/reference/slm-implementation-matrix.md
index eb1fc6e..ff50192 100644
--- a/docs/architecture/reference/slm-implementation-matrix.md
+++ b/docs/architecture/reference/slm-implementation-matrix.md
@@ -15,13 +15,13 @@ This document provides a repo-by-repo implementation matrix showing SLM endpoint
 
 ## Documentation Structure
 
-```
+```text
 reference/
 ├── slm-implementation-matrix.md      # This file
 ├── matrix-gateway.md                  # AI Gateway details
 ├── matrix-cognitive-mesh.md          # Cognitive Mesh details
-├── matrix-codeflow.md                # CodeFlow Engine details
-├── matrix-agentkit.md                # AgentKit Forge details
+├── matrix-codeflow.md                 # CodeFlow Engine details
+├── matrix-agentkit.md                 # AgentKit Forge details
 ├── matrix-rooivalk.md                # PhoenixRooivalk details
 └── matrix-mystira.md                 # Mystira details
 ```
@@ -251,7 +251,7 @@ This is a practical role map, not a vendor mandate.
 
 ### Standard Fallback Pattern
 
-```
+```text
 1. SLM timeout → Deterministic rules
 2. Low confidence → LLM escalation
 3. Safety critical → Block immediately
diff --git a/docs/architecture/reference/slm-management-plan.md b/docs/architecture/reference/slm-management-plan.md
index d2b0f2e..92c164d 100644
--- a/docs/architecture/reference/slm-management-plan.md
+++ b/docs/architecture/reference/slm-management-plan.md
@@ -172,7 +172,7 @@ async def security_pipeline(request: Request) -> SecurityResult:
 
 ### Fallback Hierarchy
 
-```
+```text
 Request
    │
    ▼ Primary SLM
@@ -216,7 +216,7 @@ Request
 
 ### Model Lifecycle
 
-```
+```text
 Discovery → Testing → Staging → Production → Deprecated → Retired
     │           │         │          │            │
     ▼           ▼         ▼          ▼            ▼
@@ -271,4 +271,4 @@ Discovery → Testing → Staging → Production → Deprecated → Retired
 6. [ ] Define fallback hierarchies
 7. [ ] Implement observability stack
 8. [ ] Document model lifecycle process
-9. [ ] **Add explicit safety boundary for PhoenixRooivalk**
+9. [x] Add explicit safety boundary for PhoenixRooivalk
diff --git a/docs/architecture/reference/strategic/07-deployment-model.md b/docs/architecture/reference/strategic/07-deployment-model.md
index e4e6f8d..7e03967 100644
--- a/docs/architecture/reference/strategic/07-deployment-model.md
+++ b/docs/architecture/reference/strategic/07-deployment-model.md
@@ -26,14 +26,14 @@ flowchart TD
 
 ## Decision Matrix
 
-| System          | Best SLM Jobs              | Less Suitable                  |
-| --------------- | -------------------------- | ------------------------------ |
-| AI Gateway      | routing, screening, cost   | Nuanced synthesis              |
-| Cognitive Mesh  | routing, decomposition     | Final judgment                 |
-| CodeFlow        | PR triage, log analysis    | Root cause across dependencies |
-| AgentKit        | tool selection, extraction | Multi-step planning            |
-| PhoenixRooivalk | summaries, alerts          | Sole threat authority          |
-| Mystira         | safety, continuity         | Rich narrative                 |
+| System          | Best SLM Jobs                                                 | Less Suitable                  |
+| --------------- | ------------------------------------------------------------- | ------------------------------ |
+| AI Gateway      | routing, screening, cost                                      | Nuanced synthesis              |
+| Cognitive Mesh  | routing, decomposition                                        | Final judgment                 |
+| CodeFlow        | PR classification, CI failure triage, release-note extraction | Root cause across dependencies |
+| AgentKit        | tool selection, extraction                                    | Multi-step planning            |
+| PhoenixRooivalk | summaries, alerts                                             | Sole threat authority          |
+| Mystira         | safety, continuity                                            | Rich narrative                 |
 
 ## Practical Gateway Flow
 
diff --git a/docs/architecture/systems/agentkit-forge.md b/docs/architecture/systems/agentkit-forge.md
index c84a182..e61cf67 100644
--- a/docs/architecture/systems/agentkit-forge.md
+++ b/docs/architecture/systems/agentkit-forge.md
@@ -4,7 +4,7 @@ AgentKit Forge builds AI agents and orchestration workflows. SLMs help when agen
 
 ## Architecture
 
-```
+```text
 Agent Task
       │
       ▼
diff --git a/docs/architecture/systems/ai-gateway.md b/docs/architecture/systems/ai-gateway.md
index 5a288f0..751f9fc 100644
--- a/docs/architecture/systems/ai-gateway.md
+++ b/docs/architecture/systems/ai-gateway.md
@@ -4,7 +4,7 @@ AI Gateway sits between applications and multiple AI providers. The SLM acts as
 
 ## Architecture
 
-```
+```text
 Client Request
       │
       ▼
@@ -144,3 +144,31 @@ Track per routing decision:
 - [ ] Add security prefiltering (injection, PII, secrets)
 - [ ] Set up cost tracking per tier
 - [ ] Configure latency alerts
+
+## v1 API Routing
+
+The gateway routes `/v1/responses` and `/v1/embeddings` requests to Azure OpenAI via LiteLLM provider configuration.
+
+### Routing Rules
+
+| Endpoint         | Provider               | Notes                     |
+| ---------------- | ---------------------- | ------------------------- |
+| `/v1/responses`  | LiteLLM → Azure OpenAI | Standard chat completions |
+| `/v1/embeddings` | LiteLLM → Azure OpenAI | Text embedding generation |
+
+### Example LiteLLM Config
+
+```yaml
+model_list:
+  - model_name: gpt-4.1
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: https://<resource>.openai.azure.com
+      api_key: os.environ/AZURE_OPENAI_API_KEY
+      api_version: "2025-04-01-preview"
+```
+
+### Response vs Embeddings Handling
+
+- **Responses**: Model selection based on complexity/classification; supports streaming
+- **Embeddings**: Batched processing; fixed deployment mapping
diff --git a/docs/architecture/systems/codeflow-engine.md b/docs/architecture/systems/codeflow-engine.md
index 30f5dfd..c1de01e 100644
--- a/docs/architecture/systems/codeflow-engine.md
+++ b/docs/architecture/systems/codeflow-engine.md
@@ -4,7 +4,7 @@ CodeFlow Engine is a DevOps and CI/CD intelligence system. **This is one of the
 
 ## Architecture
 
-```
+```text
 Git Push / PR Event
       │
       ▼
@@ -96,7 +96,7 @@ async def select_tests(change_type: str, impacted_files: list[str]) -> TestPlan:
 Type: {change_type}
 Files: {', '.join(impacted_files)}
 
-Output: { "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }"""
+Output: {{ "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }}"""
 
     return await slm_completion(prompt)
 ```
diff --git a/docs/architecture/systems/cognitive-mesh.md b/docs/architecture/systems/cognitive-mesh.md
index d4f2c96..dedc502 100644
--- a/docs/architecture/systems/cognitive-mesh.md
+++ b/docs/architecture/systems/cognitive-mesh.md
@@ -4,7 +4,7 @@ Cognitive Mesh architectures orchestrate multiple AI agents and tools. The SLM i
 
 ## Architecture
 
-```
+```text
 User Query
       │
       ▼
@@ -143,7 +143,7 @@ async def compress_context(messages: list[Message]) -> Compressed:
 
 | Pros                            | Cons                                            |
 | ------------------------------- | ----------------------------------------------- |
-| Very large token savings        | Decomposition quality can bottleneck workflow   |
+| Large token savings             | Decomposition quality can bottleneck workflow   |
 | Better determinism              | Brittle routing if taxonomy is poor             |
 | Easier specialist orchestration | Harder debugging if confidence handling is weak |
 | Improved auditability           |                                                 |
diff --git a/docs/architecture/systems/phoenix-rooivalk.md b/docs/architecture/systems/phoenix-rooivalk.md
index dedaf96..5c0e0a7 100644
--- a/docs/architecture/systems/phoenix-rooivalk.md
+++ b/docs/architecture/systems/phoenix-rooivalk.md
@@ -4,7 +4,7 @@ PhoenixRooivalk is an edge AI counter-UAS (Unmanned Aerial System) system. **SLM
 
 ## Architecture
 
-```
+```text
 Sensors
   │
   ▼
diff --git a/docs/planning/request_to_token_attribution.md b/docs/planning/request_to_token_attribution.md
index d5790c1..b0b5cd0 100644
--- a/docs/planning/request_to_token_attribution.md
+++ b/docs/planning/request_to_token_attribution.md
@@ -122,9 +122,9 @@ Start with downstream aggregation in pvc-costops-analytics - the cheapest and fa
 
 ### 1. cognitive-mesh (Upstream Caller)
 
-**Required:** Pass correlation metadata in request body when calling gateway. There are two methods:
+**Recommended:** Pass correlation metadata in request body when calling gateway. There are two methods:
 
-**Method A: Via Request Metadata (Recommended)**
+**Method A: Via Request Metadata (Preferred)**
 Pass correlation IDs in the request body `metadata` field:
 
 ```json
@@ -142,7 +142,7 @@ Pass correlation IDs in the request body `metadata` field:
 }
 ```
 
-**Method B: Via HTTP Headers**
+**Method B: Via HTTP Headers** (alternative - requires additional LiteLLM configuration or middleware)
 
 - x-request-id
 - x-session-id
@@ -151,8 +151,6 @@ Pass correlation IDs in the request body `metadata` field:
 - x-stage-name
 - x-user-id
 
-_Note: Method B requires additional LiteLLM configuration or middleware._
-
 ### 2. pvc-costops-analytics (Downstream Analytics)
 
 **Required:** KQL queries and dashboards to:
@@ -218,7 +216,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 - cognitive-mesh: Pass correlation metadata in request body
 - pvc-costops-analytics: Must create KQL queries for new event shape
-- infra: Application Insights resource + APPLICATIONINSIGHTS_CONNECTION_STRING wiring added; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default)
+- infra: Application Insights resource created; APPLICATIONINSIGHTS_CONNECTION_STRING stored in Key Vault and wired to container app via secret reference; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default)
 
 ## Action Items
 
@@ -226,7 +224,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 1. ✅ ai-gateway: Add OTEL callback for token telemetry (Phase 1)
 2. ✅ ai-gateway: Document correlation ID requirements (Phase 2)
-3. ✅ ai-gateway: Add Application Insights connection string wiring (Phase 1b - trace export requires custom image or OTLP collector)
+3. ✅ ai-gateway: Add Application Insights connection string wiring via Key Vault (Phase 1b - trace export requires custom image or OTLP collector)
 
 ### Pending
 

From 02a8bb7dac0b386c21ef2c663cb48afcab1e77a6 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:17:57 +0200
Subject: [PATCH 03/12] fix(workflows): rename smoke_models_wait_sleep to
 smoke_models_wait_attempts and update default value

---
 .github/workflows/deploy-environment.yaml | 8 ++++----
 .github/workflows/deploy.yaml             | 3 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
index c35a239..2a1d739 100644
--- a/.github/workflows/deploy-environment.yaml
+++ b/.github/workflows/deploy-environment.yaml
@@ -28,11 +28,11 @@ on:
         type: string
         default: "10"
         description: Retry sleep for smoke tests
-      smoke_models_wait_sleep:
+      smoke_models_wait_attempts:
         required: false
         type: string
-        default: "15"
-        description: Wait sleep for model registration
+        default: "1"
+        description: Number of attempts to wait for models to become available
       include_aoai_host_check:
         required: false
         type: boolean
@@ -249,7 +249,7 @@ jobs:
           aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
           max_attempts: "3"
           retry_sleep: ${{ inputs.smoke_retry_sleep }}
-          models_wait_attempts: ${{ inputs.env_name == 'prod' && '3' || '1' }}
+          models_wait_attempts: ${{ inputs.smoke_models_wait_attempts }}
           models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}
 
       - name: Smoke test shared state API (dashboard proxy)
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 97c8e2d..7877e43 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -159,6 +159,7 @@ jobs:
       terraform_working_directory: infra/env/dev
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
+      smoke_models_wait_attempts: "1"
       include_aoai_host_check: false
       environment: dev
     secrets:
@@ -193,6 +194,7 @@ jobs:
       terraform_working_directory: infra/env/staging
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
+      smoke_models_wait_attempts: "1"
       include_aoai_host_check: false
       environment: staging
     secrets:
@@ -227,6 +229,7 @@ jobs:
       terraform_working_directory: infra/env/prod
       smoke_retry_sleep: "15"
       smoke_models_wait_sleep: "30"
+      smoke_models_wait_attempts: "3"
       include_aoai_host_check: true
       environment: prod
     secrets:

From 0e4fff23b4f93cedb6453fe65538f0d226847fd7 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:19:12 +0200
Subject: [PATCH 04/12] fix(workflows): rename smoke_models_wait_sleep to
 smoke_models_wait_attempts and update default value # Pull Request Checklist

## Summary

- What changed?
- Why was it needed?

## Validation

- [ ] Local checks run (if applicable)
- [ ] Relevant workflow/jobs observed

## Deployment Notes

- [ ] No environment/config changes required
- [ ] Environment/config changes required (describe below)

## UAT Toggle (PRs to `main`)

- Add label `run-uat` to this PR to enable UAT deployment (`deploy-uat`).
- Remove label `run-uat` to skip UAT deployment.

## Risk / Rollback

- Risk level: low / medium / high
- Rollback plan:
---
 infra/modules/aigateway_aca/outputs.tf | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/infra/modules/aigateway_aca/outputs.tf b/infra/modules/aigateway_aca/outputs.tf
index a8dfe6b..e6b8ff3 100644
--- a/infra/modules/aigateway_aca/outputs.tf
+++ b/infra/modules/aigateway_aca/outputs.tf
@@ -30,8 +30,7 @@ output "container_app_environment_id" {
   value       = azurerm_container_app_environment.cae.id
 }
 
-output "application_insights_connection_string" {
-  value       = azurerm_application_insights.ai.connection_string
-  description = "Application Insights connection string for OTEL export."
-  sensitive   = true
+output "application_insights_name" {
+  description = "Application Insights resource name. Retrieve connection string from Key Vault secret 'appinsights-connection-string'."
+  value       = azurerm_application_insights.ai.name
 }

From 27e172419cad954e2b8e41ae8c1728e80cf56e10 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:47:47 +0200
Subject: [PATCH 05/12] feat(workflow): add smoke_models_wait_sleep parameter
 and update docs

- Add new workflow parameter to control sleep between model availability checks
- Update container architecture diagram to include Webhook Auth component
- Improve Prometheus metrics documentation clarity
- Add migration note for Matrix Gateway response contract changes in v1.0.0
---
 .github/workflows/deploy-environment.yaml       |  5 +++++
 docs/architecture/02-container-architecture.md  |  4 +++-
 docs/architecture/04-observability-telemetry.md |  2 +-
 docs/architecture/reference/matrix-gateway.md   | 10 ++++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
index 2a1d739..5d39d8e 100644
--- a/.github/workflows/deploy-environment.yaml
+++ b/.github/workflows/deploy-environment.yaml
@@ -28,6 +28,11 @@ on:
         type: string
         default: "10"
         description: Retry sleep for smoke tests
+      smoke_models_wait_sleep:
+        required: false
+        type: string
+        default: "15"
+        description: Sleep seconds between model availability checks
       smoke_models_wait_attempts:
         required: false
         type: string
diff --git a/docs/architecture/02-container-architecture.md b/docs/architecture/02-container-architecture.md
index b34021d..544639c 100644
--- a/docs/architecture/02-container-architecture.md
+++ b/docs/architecture/02-container-architecture.md
@@ -27,6 +27,7 @@ flowchart TB
         G4[Budget Router]
         G5[Semantic Cache]
         G6[Escalation Judge]
+        W[Webhook Auth]
     end
 
     subgraph Mesh
@@ -57,7 +58,8 @@ flowchart TB
 
     C1 --> G1
     C2 --> G1
-    C3 --> G1
+    C3 --> W
+    W --> G1
     C4 --> G1
 
     G1 --> G2
diff --git a/docs/architecture/04-observability-telemetry.md b/docs/architecture/04-observability-telemetry.md
index 79e7f9f..8c095da 100644
--- a/docs/architecture/04-observability-telemetry.md
+++ b/docs/architecture/04-observability-telemetry.md
@@ -76,7 +76,7 @@ flowchart TB
 
 ### Telemetry Sinks
 
-LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint that scrapes application metrics. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.
+LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint which is scraped by Prometheus for application metrics collection. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.
 
 The primary telemetry sinks are:
 
diff --git a/docs/architecture/reference/matrix-gateway.md b/docs/architecture/reference/matrix-gateway.md
index 55b8dbb..395719c 100644
--- a/docs/architecture/reference/matrix-gateway.md
+++ b/docs/architecture/reference/matrix-gateway.md
@@ -51,6 +51,16 @@ flowchart TD
 }
 ```
 
+> **Migration Note (v1.0.0)**: The response contract has been updated. Legacy field names `intent`, `recommended_target`, `recommended_model_tier`, and `escalation_required` are deprecated. Update clients to use the new fields:
+>
+> - `intent` → `label`
+> - `recommended_target` → removed (use `recommended_tier` for routing)
+> - `recommended_model_tier` → `recommended_tier`
+> - `escalation_required` → derive from `confidence < 0.75` threshold
+> - `cacheable` is a new field (previously not returned)
+>
+> **Deprecation window**: Legacy fields will be removed in v1.2.0. Clients should update by then. For backwards compatibility, implement fallback logic checking both old and new field names.
+
 ## Contract Shapes
 
 ```typescript

From 19759df0ac7763e852cefe1d7ee487409c3553a0 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 15:00:41 +0200
Subject: [PATCH 06/12] fix: spacing issues in yaml

---
 infra/modules/dashboard_aca/main.tf     | 2 +-
 infra/modules/state_service_aca/main.tf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/infra/modules/dashboard_aca/main.tf b/infra/modules/dashboard_aca/main.tf
index 65f86c8..ce740f4 100644
--- a/infra/modules/dashboard_aca/main.tf
+++ b/infra/modules/dashboard_aca/main.tf
@@ -12,7 +12,7 @@ terraform {
 locals {
   prefix           = "pvc-${var.env}-${var.projname}"
   ca_name          = "${local.prefix}-dashboard-${var.location_short}"
-  use_shared_token = trim(var.state_service_shared_token) != ""
+  use_shared_token = trimspace(var.state_service_shared_token) != ""
 
   tags = merge({
     env     = var.env
diff --git a/infra/modules/state_service_aca/main.tf b/infra/modules/state_service_aca/main.tf
index da86391..d768198 100644
--- a/infra/modules/state_service_aca/main.tf
+++ b/infra/modules/state_service_aca/main.tf
@@ -13,7 +13,7 @@ locals {
   prefix            = "pvc-${var.env}-${var.projname}"
   ca_name           = "${local.prefix}-state-${var.location_short}"
   use_registry_auth = var.registry_username != "" && var.registry_password != ""
-  use_shared_token  = trim(var.state_service_shared_token) != ""
+  use_shared_token  = trimspace(var.state_service_shared_token) != ""
 
   tags = merge({
     env     = var.env

From 9be91e54da5bf43f57dda3bddbe523ee51000501 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 16:37:06 +0200
Subject: [PATCH 07/12] docs: add AGENTS.md with guidance for AI coding agents

Add comprehensive documentation for AI coding agents working in this repository, including:
- Project overview and tech stack
- Build/lint/test commands for each component
- Code style guidelines for Python, JavaScript, and Terraform
- Architecture overview and key files
- Prerequisites and pre-commit checks
---
 AGENTS.md | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 AGENTS.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..3fd1618
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,227 @@
+# AGENTS.md - Guidance for AI Coding Agents
+
+This file provides guidance for AI coding agents operating in this repository.
+
+## Project Overview
+
+**ai-gateway** — OpenAI-compatible AI gateway built on LiteLLM, deployed to Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI.
+
+### Tech Stack
+
+- **Gateway**: LiteLLM (Python)
+- **Dashboard**: Node.js/pnpm (in `dashboard/`)
+- **Infrastructure**: Terraform (>= 1.14.0) in `infra/`
+- **State Service**: Python/FastAPI in `state-service/`
+- **Type Checking**: mypy
+- **Scripts**: Deployment/setup scripts in `scripts/`
+
+---
+
+## Build / Lint / Test Commands
+
+### Dashboard (Node.js/pnpm)
+
+```bash
+cd dashboard
+pnpm install          # Install dependencies
+pnpm dev              # Start dev server
+pnpm format           # Format code with prettier
+pnpm format:check    # Check formatting only
+pnpm lint             # Run format check
+```
+
+### Python (State Service)
+
+```bash
+# Type checking
+mypy .                # Run mypy on entire project
+
+# Running a single Python test (if tests exist)
+python -m pytest scripts/test_specific.py::TestClass::test_method
+
+# Individual script execution
+python scripts/integration_test.py
+python scripts/check_aoai_embeddings.py
+```
+
+### Terraform (Infrastructure)
+
+```bash
+cd infra
+
+# Initialize and plan
+terraform init
+terraform plan
+
+# Format check
+terraform fmt -check -recursive
+
+# Apply
+terraform apply
+```
+
+### Combined Checks
+
+```bash
+# Run all checks (format + terraform)
+pnpm check
+```
+
+---
+
+## Code Style Guidelines
+
+### Python (state-service/)
+
+**Imports**
+
+- Use absolute imports within packages: `from .routes import router`
+- Group imports: stdlib → third-party → local
+- Use `import os`, `from typing import Optional`, etc.
+
+**Formatting**
+
+- Follow PEP 8
+- Use 4 spaces for indentation
+- Maximum line length: 100 characters
+
+**Types (mypy)**
+
+- Python version: 3.13 (see `mypy.ini`)
+- Use type hints for function parameters and return values
+- Run `mypy .` before committing
+
+**Naming**
+
+- Variables/functions: `snake_case`
+- Classes: `PascalCase`
+- Constants: `UPPER_SNAKE_CASE`
+- Private members: prefix with `_`
+
+**Error Handling**
+
+- Use custom exceptions with descriptive names
+- Catch specific exceptions, not bare `except:`
+- Include context in error messages
+
+```python
+def selection_key(user_id: str) -> str:
+    if not user_id or not user_id.strip():
+        raise ValueError("user_id must be a non-empty string")
+    # ...
+```
+
+### JavaScript (dashboard/)
+
+**Formatting**
+
+- Use Prettier for formatting (configured in `package.json`)
+- Run `pnpm format` before committing
+
+**Naming**
+
+- Variables/functions: `camelCase`
+- Constants: `UPPER_SNAKE_CASE` or `camelCase` with const
+- Classes: `PascalCase`
+
+**General JS Style**
+
+- Use `const` by default, `let` when reassignment needed
+- Prefer template literals over string concatenation
+- Use strict equality (`===`) not loose equality (`==`)
+
+```javascript
+const MAX_POINTS = 20;
+const reqHistory = { labels: [], datasets: [...] };
+```
+
+### Terraform (infra/)
+
+**Formatting**
+
+- Use `terraform fmt` to format files
+- Run `terraform fmt -check -recursive` in CI
+
+**Naming**
+
+- Resources: `snake_case`
+- Variables: `snake_case`
+- Outputs: `snake_case`
+
+**General**
+
+- Use local values for repeated expressions
+- Tag all resources with `env`, `project`
+- Pin provider versions: `version = ">= 4.62.0"`
+
+### GitHub Actions (`.github/workflows/`)
+
+**Formatting**
+
+- Use Prettier for YAML files
+- Run `pnpm format` to format workflow files
+
+**Naming**
+
+- Job names: descriptive, lowercase with hyphens
+- Step names: descriptive
+
+### Documentation (docs/)
+
+**Formatting**
+
+- Use Prettier for Markdown files
+- Run `pnpm format` to format docs
+
+**General**
+
+- Use ATX-style headers (`#`, `##`, etc.)
+- Keep lines under 100 characters when practical
+- Include code blocks with language identifiers
+
+---
+
+## Architecture Overview
+
+```
+docs/architecture/
+├── systems/          # Individual system documentation
+├── reference/        # Reference and planning docs
+│   └── strategic/   # Strategic guidance
+├── 01-*-*.md       # ADR-style documents
+
+dashboard/           # Admin UI (Node.js/pnpm)
+infra/              # Terraform IaC
+scripts/            # Deployment automation
+state-service/      # FastAPI state service
+```
+
+---
+
+## Key Files
+
+| File                                  | Purpose               |
+| ------------------------------------- | --------------------- |
+| `CLAUDE.md`                           | Claude Code guidance  |
+| `dashboard/app.js`                    | Dashboard UI          |
+| `infra/modules/aigateway_aca/main.tf` | Main infrastructure   |
+| `state-service/state_service/`        | FastAPI state service |
+| `.github/workflows/deploy.yaml`       | CI/CD pipeline        |
+
+---
+
+## Prerequisites
+
+- Azure CLI (`az login`)
+- Terraform >= 1.14.0
+- Node.js + pnpm
+- Python 3.13+
+
+---
+
+## Before Committing
+
+1. Run formatting: `pnpm format`
+2. Run type checks: `mypy .` (if Python changed)
+3. Run terraform fmt: `terraform fmt -check -recursive`
+4. Test locally if possible

From 72c004c37cf053f732173cc5c093db1291917a20 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 17:23:25 +0200
Subject: [PATCH 08/12] feat(infra): add state service configuration to dev
 environment

---
 infra/env/dev/terraform.tfvars | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/infra/env/dev/terraform.tfvars b/infra/env/dev/terraform.tfvars
index aaec5e8..e2d08c8 100644
--- a/infra/env/dev/terraform.tfvars
+++ b/infra/env/dev/terraform.tfvars
@@ -23,3 +23,8 @@ tags = {
 }
 
 enable_redis_cache = true
+
+# State Service
+state_service_container_image   = "ghcr.io/phoenixvc/ai-gateway-state-service:latest"
+state_service_registry_username = "phoenixvc"
+state_service_registry_password = "ghp_xxx"

From 89b65606073cd4ee6f40b833ffd8ac07a6b03d36 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sat, 21 Mar 2026 04:42:03 +0200
Subject: [PATCH 09/12] docs(readme): rename ai-gateway references to sluice

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CLAUDE.md | 4 ++--
 README.md | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index c0c4322..84f26a9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## Project
 
-**ai-gateway** — OpenAI-compatible AI gateway built on LiteLLM, deployed to Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI.
+**sluice** — OpenAI-compatible AI gateway built on LiteLLM, deployed to Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI.
 
 ## Tech Stack
 
@@ -44,4 +44,4 @@ python update_env_main.py # Update environment config
 
 ## AgentKit Forge
 
-This project has not yet been onboarded to [AgentKit Forge](https://github.com/phoenixvc/agentkit-forge). To request onboarding, [create a ticket](https://github.com/phoenixvc/agentkit-forge/issues/new?title=Onboard+ai-gateway&labels=onboarding).
+This project has not yet been onboarded to [retort](https://github.com/phoenixvc/retort). To request onboarding, [create a ticket](https://github.com/phoenixvc/retort/issues/new?title=Onboard+sluice&labels=onboarding).
diff --git a/README.md b/README.md
index dabd2de..319edbf 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# ai-gateway
+# sluice
 
 OpenAI-compatible AI Gateway (LiteLLM) on Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI.
 

From b84ee04369de36fe9b744201c29f70359086a060 Mon Sep 17 00:00:00 2001
From: Jurie Smit <smit.jurie@gmail.com>
Date: Sat, 28 Mar 2026 14:32:10 +0200
Subject: [PATCH 10/12] docs: add ecosystem section linking to phoenixvc
 platform repos

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/README.md b/README.md
index 319edbf..5c3551d 100644
--- a/README.md
+++ b/README.md
@@ -107,3 +107,18 @@ pnpm format
 - [CI/CD Runbook](docs/CI_CD.md) – workflow behavior, staging toggle, smoke tests
 - [Azure OIDC Setup](docs/AZURE_OIDC_SETUP.md) – GitHub Actions OIDC configuration
 - [Secrets Checklist](docs/SECRETS.md) – Copy/paste setup for GitHub environment secrets
+
+---
+
+## Ecosystem
+
+sluice is the AI data plane for the phoenixvc platform. All org AI traffic should route through sluice.
+
+| Repo | Role |
+|---|---|
+| [`docket`](https://github.com/phoenixvc/docket) | Consumes sluice usage telemetry (OTEL spans) for LLM spend tracking, FOCUS exports, and cost attribution per project |
+| [`cognitive-mesh`](https://github.com/phoenixvc/cognitive-mesh) | Routes all LLM calls through sluice for unified observability and model switching |
+| [`mystira-workspace`](https://github.com/phoenixvc/mystira-workspace) | Primary consumer — story generation, publisher, admin LLM calls all route through sluice |
+| [`phoenix-flow`](https://github.com/phoenixvc/phoenix-flow) | AI-assisted task routing calls route through sluice |
+| [`codeflow-engine`](https://github.com/phoenixvc/codeflow-engine) | AutoPR AI analysis calls route through sluice |
+| [`org-meta`](https://github.com/phoenixvc/org-meta) | Documents sluice as the org-wide AI gateway; sluice configuration lives alongside the registry |

From b737f84e9d5753f06ad783a7202670400b659841 Mon Sep 17 00:00:00 2001
From: Jurie Smit <smit.jurie@gmail.com>
Date: Wed, 1 Apr 2026 00:24:26 +0200
Subject: [PATCH 11/12] docs: rewrite README with architecture overview and
 component breakdown

- Lead with what sluice is and why it exists (central AI data plane vs
  scattered direct Azure OpenAI calls)
- Document all three components: Gateway, State Service, Dashboard
- Add endpoint tables for gateway and state service
- Add observability signals table (Prometheus, OTEL, Langfuse, Log Analytics)
- Add environment table with default models per env
- Move prerequisites to bottom; tighten secrets table
- Add architecture ADRs to documentation table
- Add retort to ecosystem table

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md | 183 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 117 insertions(+), 66 deletions(-)

diff --git a/README.md b/README.md
index 5c3551d..8460995 100644
--- a/README.md
+++ b/README.md
@@ -1,68 +1,108 @@
 # sluice
 
-OpenAI-compatible AI Gateway (LiteLLM) on Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI.
+OpenAI-compatible AI gateway for the phoenixvc platform — routes all org AI traffic through a single, observable, rate-limited data plane backed by Azure OpenAI.
 
-## Prerequisites
+## What it is
 
-- [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) (`az login`)
-- [Terraform](https://www.terraform.io/downloads) >= 1.14.0
-- Bash or PowerShell (for scripts)
+Direct Azure OpenAI calls scatter across projects with no shared observability, no rate governance, and duplicated secret management. **Sluice** fixes this: a thin LiteLLM proxy that presents an OpenAI-compatible surface to every consumer, while centralising auth, rate limiting, semantic caching, and telemetry in one place.
 
-## Quick Start
+```
+your code  →  POST /v1/responses      ─┐
+               POST /v1/embeddings    ─┤  sluice (LiteLLM)  →  Azure OpenAI
+               GET  /metrics          ─┘
+```
 
-### 1. Bootstrap Terraform state (one-time)
+All org AI traffic should route through sluice. That single rule gives you per-project spend attribution, a kill switch, and consistent model governance without changing any consumer code.
 
-Creates the shared resource group, storage account, and container for Terraform state.
+## Components
 
-**Bash:**
+| Component | Stack | Purpose |
+|-----------|-------|---------|
+| **Gateway** | LiteLLM + Docker | OpenAI-compatible proxy on Azure Container Apps |
+| **State Service** | FastAPI + Redis | Per-user model selection and model catalog API |
+| **Dashboard** | Node.js + Chart.js | Real-time request/token metrics and model switching UI |
+| **Infrastructure** | Terraform + Azure | Container Apps, Key Vault, Log Analytics, optional Redis |
 
-```bash
-./scripts/bootstrap.sh <GITHUB_ORG> <GITHUB_REPO> [SCOPE]
-```
+### Gateway
 
-**PowerShell:**
+Exposes two endpoints:
 
-```powershell
-.\scripts\bootstrap.ps1 -GITHUB_ORG <org> -GITHUB_REPO <repo> [-SCOPE <scope>]
-```
+| Endpoint | Routes to |
+|----------|-----------|
+| `POST /v1/responses` | Azure OpenAI Responses API (configurable model per env) |
+| `POST /v1/embeddings` | Azure OpenAI `text-embedding-3-large` |
 
-### 2. Add GitHub secrets
+Authentication uses a shared `master_key` passed as a Bearer token. Rate limiting (RPM, TPM, budget caps) and optional Redis semantic caching are configured in Terraform and enforced by LiteLLM.
 
-Add these secrets to each GitHub **Environment** (dev, staging, prod): **Settings → Environments → &lt;env&gt; → Environment secrets**.
+### State Service
 
-| Secret                  | Description                       | Example                                       |
-| ----------------------- | --------------------------------- | --------------------------------------------- |
-| **Infrastructure**      |                                   |                                               |
-| `TF_BACKEND_RG`         | Terraform state resource group    | `pvc-shared-tfstate-rg-san`                   |
-| `TF_BACKEND_SA`         | Terraform state storage account   | `pvctfstatexxxxxxxx`                          |
-| `TF_BACKEND_CONTAINER`  | Terraform state container         | `tfstate`                                     |
-| `AZURE_CLIENT_ID`       | OIDC app (from bootstrap)         | `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`        |
-| `AZURE_TENANT_ID`       | Azure tenant ID                   | `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`        |
-| `AZURE_SUBSCRIPTION_ID` | Azure subscription ID             | `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`        |
-| **Application**         |                                   |                                               |
-| `AZURE_OPENAI_ENDPOINT` | Azure OpenAI endpoint URL         | `https://mys-shared-ai-san.openai.azure.com/` |
-| `AZURE_OPENAI_API_KEY`  | Azure OpenAI API key              | Your Azure OpenAI key                         |
-| `AIGATEWAY_KEY`         | Gateway auth key (from bootstrap) | Base64 string from bootstrap output           |
+FastAPI service managing which model each user has selected and what models are available.
 
-Bootstrap prints these values. For local runs, copy `infra/.env.local.example` to `infra/.env.local` with the infrastructure values.
+| Endpoint | Description |
+|----------|-------------|
+| `GET /state/catalog` | Available models and their status |
+| `PUT /state/catalog` | Update the model catalog (token-protected) |
+| `GET /state/selection` | Fetch the calling user's selected model |
+| `PUT /state/selection` | Update the calling user's model selection |
+| `GET /state/selections` | Paginated list of all users' selections |
 
-> **Key Vault firewall:** Deployments from GitHub Actions require Key Vault to allow public network access. The Terraform module defaults `key_vault_network_default_action` to `Allow` for CI. If you see `ForbiddenByFirewall`, ensure the `fix/key-vault-network-acls` changes are merged and applied.
+Requires `X-User-Id` header. In production, backed by Azure Cache for Redis; falls back to in-memory for local dev.
 
-### 3. Terraform init
+### Dashboard
 
-**Bash:**
+Real-time monitoring UI:
+- Request counts and token usage charts (polls `/metrics` Prometheus endpoint)
+- Model catalog display and per-user model selection
+- Session-based API key management
+
+### Observability
+
+| Signal | Tool |
+|--------|------|
+| Metrics | Prometheus (`/metrics`), Grafana Cloud (optional) |
+| Tracing | OpenTelemetry → Application Insights |
+| LLM observability | Langfuse (optional) |
+| Structured logs | Log Analytics Workspace |
+
+---
+
+## Quick Start
+
+### 1. Bootstrap Terraform state (one-time)
+
+Creates the shared resource group, storage account, and container for Terraform state; registers an Azure AD app for GitHub Actions OIDC; outputs the values you need for GitHub secrets.
 
 ```bash
-./infra/scripts/terraform-init.sh dev   # or staging, prod
+./scripts/bootstrap.sh <GITHUB_ORG> <GITHUB_REPO> [SCOPE]
+# PowerShell: .\scripts\bootstrap.ps1 -GITHUB_ORG <org> -GITHUB_REPO <repo>
 ```
 
-**PowerShell:**
+### 2. Add GitHub environment secrets
 
-```powershell
-.\infra\scripts\terraform-init.ps1 -Env dev   # or staging, prod
-```
+Add these to each GitHub **Environment** (dev, staging, prod) under **Settings → Environments → &lt;env&gt; → Environment secrets**:
+
+| Secret | Description |
+|--------|-------------|
+| `TF_BACKEND_RG` | Terraform state resource group |
+| `TF_BACKEND_SA` | Terraform state storage account |
+| `TF_BACKEND_CONTAINER` | Terraform state container (`tfstate`) |
+| `AZURE_CLIENT_ID` | OIDC app registration client ID (from bootstrap) |
+| `AZURE_TENANT_ID` | Azure tenant ID |
+| `AZURE_SUBSCRIPTION_ID` | Azure subscription ID |
+| `AZURE_OPENAI_ENDPOINT` | Azure OpenAI endpoint URL |
+| `AZURE_OPENAI_API_KEY` | Azure OpenAI API key |
+| `AIGATEWAY_KEY` | Gateway auth key (from bootstrap output) |
+
+Bootstrap prints all of these. For local runs, copy `infra/.env.local.example` → `infra/.env.local`.
+
+> **Key Vault firewall:** GitHub Actions deployments require Key Vault to allow public network access. The Terraform module defaults `key_vault_network_default_action` to `Allow` for CI. If you see `ForbiddenByFirewall`, verify the ACL changes are applied.
 
-Valid environments: `dev`, `staging`, `prod`.
+### 3. Terraform init
+
+```bash
+./infra/scripts/terraform-init.sh dev   # or staging, prod
+# PowerShell: .\infra\scripts\terraform-init.ps1 -Env dev
+```
 
 ### 4. Plan and apply
 
@@ -72,53 +112,64 @@ terraform plan
 terraform apply
 ```
 
+---
+
 ## Environments
 
-| Env     | Purpose     |
-| ------- | ----------- |
-| dev     | Development |
-| staging | Staging     |
-| prod    | Production  |
+| Env | Purpose | Default model |
+|-----|---------|---------------|
+| `dev` | Development | `gpt-5.3-codex` (preview) |
+| `staging` | Pre-production validation | configurable |
+| `prod` | Production | `gpt-4o` |
+
+Each environment is an independent Azure deployment with its own Container Apps, Key Vault, and (optionally) Redis cache.
 
 ## CI/CD
 
-- CI/CD behavior, environment promotion rules, and smoke-test diagnostics are documented in [docs/CI_CD.md](docs/CI_CD.md).
+GitHub Actions deploys via Azure OIDC — no long-lived credentials in CI. The pipeline runs smoke tests against the gateway and state-service health endpoint after each deploy.
 
-## Formatting (pnpm)
+See [docs/CI_CD.md](docs/CI_CD.md) for promotion rules, smoke-test diagnostics, and environment toggling.
 
-This repo uses [Prettier](https://prettier.io/) via `pnpm` for lightweight formatting checks.
+## Formatting
 
 ```bash
 pnpm install
-pnpm check
-pnpm lint
-pnpm format:check
-pnpm format
+pnpm check          # lint + terraform fmt check
+pnpm format         # apply Prettier
 ```
 
-- `pnpm check` runs repo checks (`lint` + `terraform fmt -check -recursive`)
-- `pnpm lint` currently maps to formatting checks (easy to expand later)
-- `pnpm format` applies Prettier changes
+---
 
 ## Documentation
 
-- [PRD](docs/PRD.md) – Product requirements
-- [Terraform Blueprint](docs/Terraform_Blueprint.md) – Infrastructure design
-- [CI/CD Runbook](docs/CI_CD.md) – workflow behavior, staging toggle, smoke tests
-- [Azure OIDC Setup](docs/AZURE_OIDC_SETUP.md) – GitHub Actions OIDC configuration
-- [Secrets Checklist](docs/SECRETS.md) – Copy/paste setup for GitHub environment secrets
+| Doc | Contents |
+|-----|----------|
+| [PRD](docs/PRD.md) | Product requirements and constraints |
+| [Terraform Blueprint](docs/Terraform_Blueprint.md) | Infrastructure design and naming conventions |
+| [CI/CD Runbook](docs/CI_CD.md) | Workflow behavior, staging toggle, smoke test diagnostics |
+| [Azure OIDC Setup](docs/AZURE_OIDC_SETUP.md) | GitHub Actions OIDC configuration |
+| [Secrets Checklist](docs/SECRETS.md) | Copy/paste setup for GitHub environment secrets |
+| [Architecture ADRs](docs/architecture/) | System context, container architecture, SLM routing pattern, contracts |
 
 ---
 
 ## Ecosystem
 
-sluice is the AI data plane for the phoenixvc platform. All org AI traffic should route through sluice.
+Sluice is the AI data plane for the phoenixvc platform. All org AI traffic routes through it.
 
 | Repo | Role |
-|---|---|
-| [`docket`](https://github.com/phoenixvc/docket) | Consumes sluice usage telemetry (OTEL spans) for LLM spend tracking, FOCUS exports, and cost attribution per project |
+|------|------|
+| [`docket`](https://github.com/phoenixvc/docket) | Consumes sluice OTEL spans for LLM spend tracking, FOCUS exports, and per-project cost attribution |
 | [`cognitive-mesh`](https://github.com/phoenixvc/cognitive-mesh) | Routes all LLM calls through sluice for unified observability and model switching |
-| [`mystira-workspace`](https://github.com/phoenixvc/mystira-workspace) | Primary consumer — story generation, publisher, admin LLM calls all route through sluice |
+| [`mystira-workspace`](https://github.com/phoenixvc/mystira-workspace) | Primary consumer — story generation, publisher, and admin calls all route through sluice |
 | [`phoenix-flow`](https://github.com/phoenixvc/phoenix-flow) | AI-assisted task routing calls route through sluice |
 | [`codeflow-engine`](https://github.com/phoenixvc/codeflow-engine) | AutoPR AI analysis calls route through sluice |
-| [`org-meta`](https://github.com/phoenixvc/org-meta) | Documents sluice as the org-wide AI gateway; sluice configuration lives alongside the registry |
+| [`retort`](https://github.com/phoenixvc/retort) | Retort-scaffolded projects reference sluice as the recommended AI gateway |
+
+---
+
+## Prerequisites
+
+- [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) — `az login`
+- [Terraform](https://www.terraform.io/downloads) >= 1.14.0
+- Node.js + pnpm (formatting only)

From 106be8d6ba6b0b713816d923df56db30a8de0c59 Mon Sep 17 00:00:00 2001
From: Jurie Smit <smit.jurie@gmail.com>
Date: Wed, 1 Apr 2026 00:32:00 +0200
Subject: [PATCH 12/12] fix(agents): correct relative import guidance and add
 code fence language
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix 'absolute imports' → 'relative imports' (the example already used
  relative syntax, the label was wrong)
- Add 'text' language identifier to architecture overview code fence

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 AGENTS.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 3fd1618..5f41681 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -75,7 +75,7 @@ pnpm check
 
 **Imports**
 
-- Use absolute imports within packages: `from .routes import router`
+- Use relative imports within packages: `from .routes import router`
 - Group imports: stdlib → third-party → local
 - Use `import os`, `from typing import Optional`, etc.
 
@@ -183,7 +183,7 @@ const reqHistory = { labels: [], datasets: [...] };
 
 ## Architecture Overview
 
-```
+```text
 docs/architecture/
 ├── systems/          # Individual system documentation
 ├── reference/        # Reference and planning docs