diff --git a/.github/actions/import-container-app/action.yml b/.github/actions/import-container-app/action.yml
index cdd6872..5950bca 100644
--- a/.github/actions/import-container-app/action.yml
+++ b/.github/actions/import-container-app/action.yml
@@ -10,7 +10,7 @@ inputs:
     description: Project name component of the Container App name (TF_VAR_projname)
   env:
     required: true
-    description: Environment name (dev|uat|prod)
+    description: Environment name (dev|staging|prod)
   location_short:
     required: true
     description: Short location code (TF_VAR_location_short)
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 47b33b7..2081df5 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -15,10 +15,10 @@
 - [ ] No environment/config changes required
 - [ ] Environment/config changes required (describe below)
 
-## UAT Toggle (PRs to `main`)
+## Staging Toggle (PRs to `main`)
 
-- Add label `run-uat` to this PR to enable UAT deployment (`deploy-uat`).
-- Remove label `run-uat` to skip UAT deployment.
+- Add label `run-staging` to this PR to enable staging deployment (`deploy-staging`).
+- Remove label `run-staging` to skip staging deployment.
 
 ## Risk / Rollback
 
diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
new file mode 100644
index 0000000..1485e2e
--- /dev/null
+++ b/.github/workflows/deploy-environment.yaml
@@ -0,0 +1,232 @@
+name: Deploy Environment
+
+on:
+  workflow_call:
+    inputs:
+      env_name:
+        required: true
+        type: string
+        description: Environment name (dev/staging/prod)
+      tf_state_key:
+        required: true
+        type: string
+        description: Terraform state key (e.g., dev.terraform.tfstate)
+      codex_model:
+        required: true
+        type: string
+        description: Codex model deployment name
+      codex_api_version:
+        required: true
+        type: string
+        description: Codex API version
+      terraform_working_directory:
+        required: true
+        type: string
+        description: Terraform working directory (e.g., infra/env/dev)
+      smoke_retry_sleep:
+        required: false
+        type: string
+        default: "10"
+        description: Retry sleep for smoke tests
+      smoke_models_wait_sleep:
+        required: false
+        type: string
+        default: "15"
+        description: Wait sleep for model registration
+      include_aoai_host_check:
+        required: false
+        type: boolean
+        default: false
+        description: Include AOAI endpoint host validation
+    secrets:
+      AZURE_OPENAI_ENDPOINT:
+        required: true
+      AZURE_OPENAI_API_KEY:
+        required: true
+      AZURE_OPENAI_EMBEDDING_ENDPOINT:
+        required: true
+      AZURE_OPENAI_EMBEDDING_API_KEY:
+        required: true
+      AIGATEWAY_KEY:
+        required: true
+
+env:
+  TF_VAR_env: ${{ inputs.env_name }}
+  TF_VAR_projname: "aigateway"
+  TF_VAR_location: "southafricanorth"
+  TF_VAR_location_short: "san"
+  TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+  TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
+  TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
+  TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
+  TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
+  TF_VAR_codex_model: ${{ inputs.codex_model }}
+  TF_VAR_codex_api_version: ${{ inputs.codex_api_version }}
+  TF_VAR_embedding_deployment: "text-embedding-3-large"
+  TF_VAR_embeddings_api_version: "2024-02-01"
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ${{ inputs.terraform_working_directory }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Quickcheck required secrets and config
+        shell: bash
+        run: |
+          set -euo pipefail
+          missing=0
+          required=(
+            AZURE_CLIENT_ID
+            AZURE_TENANT_ID
+            AZURE_SUBSCRIPTION_ID
+            TF_BACKEND_RG
+            TF_BACKEND_SA
+            TF_BACKEND_CONTAINER
+            TF_VAR_azure_openai_endpoint
+            TF_VAR_azure_openai_api_key
+            TF_VAR_gateway_key
+          )
+          for v in "${required[@]}"; do
+            if [ -z "${!v:-}" ]; then
+              echo "::error::Missing required value: ${v}"
+              missing=1
+            else
+              echo "${v}=SET"
+            fi
+          done
+          echo "TF_VAR_env=${TF_VAR_env:-unset}"
+          echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
+          echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
+          if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
+            echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
+            endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
+            echo "Azure OpenAI endpoint host=${endpoint_host}"
+            if [ "${{ inputs.include_aoai_host_check }}" = "true" ] && [ -n "${EXPECTED_AOAI_ENDPOINT_HOST:-}" ] && [ "${endpoint_host}" != "${EXPECTED_AOAI_ENDPOINT_HOST}" ]; then
+              echo "::error::Prod AOAI endpoint host mismatch. Expected '${EXPECTED_AOAI_ENDPOINT_HOST}', got '${endpoint_host}'. Check environment secret AZURE_OPENAI_ENDPOINT."
+              missing=1
+            fi
+          fi
+          if [ "${missing}" -ne 0 ]; then
+            exit 1
+          fi
+
+      - name: Azure Login
+        uses: azure/login@v2
+        with:
+          client-id: ${{ env.AZURE_CLIENT_ID }}
+          tenant-id: ${{ env.AZURE_TENANT_ID }}
+          subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
+
+      - name: Setup Terraform
+        uses: hashicorp/setup-terraform@v3
+        with:
+          terraform_version: 1.14.6
+
+      - name: Terraform Init
+        run: |
+          terraform init \
+            -backend-config="resource_group_name=${TF_BACKEND_RG}" \
+            -backend-config="storage_account_name=${TF_BACKEND_SA}" \
+            -backend-config="container_name=${TF_BACKEND_CONTAINER}" \
+            -backend-config="key=${{ inputs.tf_state_key }}"
+
+      - name: Import existing Container App into Terraform state
+        uses: ./.github/actions/import-container-app
+        with:
+          projname: ${{ env.TF_VAR_projname }}
+          env: ${{ env.TF_VAR_env }}
+          location_short: ${{ env.TF_VAR_location_short }}
+          subscription_id: ${{ env.AZURE_SUBSCRIPTION_ID }}
+          terraform_working_directory: ${{ inputs.terraform_working_directory }}
+
+      - name: Terraform Plan
+        run: |
+          terraform plan -out=tfplan
+
+      - name: Terraform Apply
+        run: |
+          terraform apply -auto-approve tfplan
+
+      - name: Get gateway URL
+        id: gw
+        run: echo "url=$(terraform output -raw gateway_url)" >> $GITHUB_OUTPUT
+
+      - name: Get dashboard URL
+        id: db
+        run: echo "url=$(terraform output -raw dashboard_url 2>/dev/null || true)" >> $GITHUB_OUTPUT
+
+      - name: Runtime diagnostics (Container App config)
+        shell: bash
+        run: |
+          set -euo pipefail
+          RG_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-rg-${TF_VAR_location_short}"
+          CA_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-ca-${TF_VAR_location_short}"
+          echo "Resource Group: ${RG_NAME}"
+          echo "Container App: ${CA_NAME}"
+          echo "Gateway URL (terraform output): ${{ steps.gw.outputs.url }}"
+          echo "Latest revision:"
+          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.latestRevisionName" -o tsv
+          echo "Active revisions (name, active, created):"
+          az containerapp revision list -g "${RG_NAME}" -n "${CA_NAME}" --query "[].{name:name,active:properties.active,created:properties.createdTime}" -o table
+          echo "Configured env vars for LiteLLM secret refs:"
+          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_AZURE_OPENAI_API_KEY' || name=='LITELLM_GATEWAY_KEY']" -o json
+          echo "Configured secret sources (names + key vault URLs):"
+          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.configuration.secrets[].{name:name,keyVaultUrl:keyVaultUrl}" -o table
+          echo "LITELLM_CONFIG_CONTENT excerpt (first 2000 chars):"
+          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_CONFIG_CONTENT'].value | [0]" -o tsv | head -c 2000 || true
+          echo
+
+      - name: Integration test (Azure OpenAI backend)
+        shell: bash
+        env:
+          AZURE_OPENAI_ENDPOINT: ${{ env.TF_VAR_azure_openai_endpoint }}
+          AZURE_OPENAI_API_KEY: ${{ env.TF_VAR_azure_openai_api_key }}
+          AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ env.TF_VAR_azure_openai_embedding_endpoint }}
+          AZURE_OPENAI_EMBEDDING_API_KEY: ${{ env.TF_VAR_azure_openai_embedding_api_key }}
+          AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ env.TF_VAR_embedding_deployment }}
+          AZURE_OPENAI_API_VERSION: ${{ env.TF_VAR_embeddings_api_version }}
+          AZURE_OPENAI_CHAT_DEPLOYMENT: "gpt-4.1"
+          AZURE_OPENAI_CHAT_API_VERSION: ${{ env.TF_VAR_codex_api_version }}
+          AZURE_OPENAI_CODEX_MODEL: ${{ env.TF_VAR_codex_model }}
+        working-directory: ${{ github.workspace }}
+        run: python3 scripts/integration_test.py
+
+      - name: Smoke test gateway (embeddings + responses)
+        uses: ./.github/actions/smoke-test-gateway
+        with:
+          gateway_url: ${{ steps.gw.outputs.url }}
+          gateway_key: ${{ secrets.AIGATEWAY_KEY }}
+          embedding_model: ${{ env.TF_VAR_embedding_deployment }}
+          codex_model: ${{ env.TF_VAR_codex_model }}
+          aoai_endpoint: ${{ env.TF_VAR_azure_openai_endpoint }}
+          aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
+          max_attempts: "3"
+          retry_sleep: ${{ inputs.smoke_retry_sleep }}
+          models_wait_attempts: ${{ if(inputs.env_name == 'prod', '3', '1') }}
+          models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}
+
+      - name: Smoke test shared state API (dashboard proxy)
+        if: env.TF_VAR_state_service_container_image != ''
+        shell: bash
+        run: |
+          set -euo pipefail
+          DASHBOARD_URL="${{ steps.db.outputs.url }}"
+          TEST_USER="ci-smoke-${TF_VAR_env}"
+
+          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/catalog" > /tmp/catalog.json
+
+          curl -fsS --connect-timeout 5 --max-time 15 -X PUT "${DASHBOARD_URL}/api/state/selection" \
+            -H "Content-Type: application/json" \
+            -H "X-User-Id: ${TEST_USER}" \
+            -d '{"enabled":true,"selected_model":"'"${TF_VAR_codex_model}"'"}' > /tmp/selection-put.json
+
+          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/selection" \
+            -H "X-User-Id: ${TEST_USER}" > /tmp/selection-get.json
+
+          jq -e '.enabled == true' /tmp/selection-get.json > /dev/null
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 34a6c9f..59ece73 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -33,23 +33,23 @@ env:
 
 jobs:
   plan:
-    # PR into dev → dev | PR into main + label 'run-uat' → uat | Push to main/workflow_dispatch → prod
+    # PR into dev → dev | PR into main + label 'run-staging' → staging | Push to main/workflow_dispatch → prod
     # Skip plan for PRs from forks (no repo secrets; avoids AADSTS700213)
-    # Runtime UAT toggle: add PR label 'run-uat' to enable UAT on PRs into main.
+    # Runtime staging toggle: add PR label 'run-staging' to enable staging on PRs into main.
     if: |
       (github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false) &&
       (
         (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
         (github.event_name == 'workflow_dispatch') ||
         (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev') ||
-        (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-uat'))
+        (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging'))
       )
     name: Plan ${{ matrix.environment }}
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        environment: ${{ (github.event_name == 'workflow_dispatch' && fromJSON('["prod"]')) || (github.event_name == 'push' && github.ref == 'refs/heads/main' && fromJSON('["prod"]')) || (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev' && fromJSON('["dev"]')) || (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-uat') && fromJSON('["uat"]')) || fromJSON('["prod"]') }}
+        environment: ${{ (github.event_name == 'workflow_dispatch' && fromJSON('["prod"]')) || (github.event_name == 'push' && github.ref == 'refs/heads/main' && fromJSON('["prod"]')) || (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev' && fromJSON('["dev"]')) || (github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging') && fromJSON('["staging"]')) || fromJSON('["prod"]') }}
     environment: ${{ matrix.environment }}
     defaults:
       run:
@@ -149,539 +149,67 @@ jobs:
   deploy-dev:
     name: Deploy dev
     needs: plan
-    runs-on: ubuntu-latest
     if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev'
     environment: dev
-    defaults:
-      run:
-        working-directory: infra/env/dev
-
-    env:
-      TF_VAR_env: "dev"
-      TF_VAR_projname: "aigateway"
-      TF_VAR_location: "southafricanorth"
-      TF_VAR_location_short: "san"
-      TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
-      TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
-      TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
-      TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
-      TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
-      TF_VAR_codex_model: "gpt-5.3-codex"
-      TF_VAR_codex_api_version: "2025-04-01-preview"
-      TF_VAR_embedding_deployment: "text-embedding-3-large"
-      TF_VAR_embeddings_api_version: "2024-02-01"
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Quickcheck required secrets and config
-        shell: bash
-        run: |
-          set -euo pipefail
-          missing=0
-          required=(
-            AZURE_CLIENT_ID
-            AZURE_TENANT_ID
-            AZURE_SUBSCRIPTION_ID
-            TF_BACKEND_RG
-            TF_BACKEND_SA
-            TF_BACKEND_CONTAINER
-            TF_VAR_azure_openai_endpoint
-            TF_VAR_azure_openai_api_key
-            TF_VAR_gateway_key
-          )
-          for v in "${required[@]}"; do
-            if [ -z "${!v:-}" ]; then
-              echo "::error::Missing required value: ${v}"
-              missing=1
-            else
-              echo "${v}=SET"
-            fi
-          done
-          echo "TF_VAR_env=${TF_VAR_env:-unset}"
-          echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
-          echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
-          if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
-            echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
-            endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
-            echo "Azure OpenAI endpoint host=${endpoint_host}"
-          fi
-          if [ "${missing}" -ne 0 ]; then
-            exit 1
-          fi
-
-      - name: Azure Login
-        uses: azure/login@v2
-        with:
-          client-id: ${{ env.AZURE_CLIENT_ID }}
-          tenant-id: ${{ env.AZURE_TENANT_ID }}
-          subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
-
-      - name: Setup Terraform
-        uses: hashicorp/setup-terraform@v3
-        with:
-          terraform_version: 1.14.6
-
-      - name: Terraform Init
-        run: |
-          terraform init \
-            -backend-config="resource_group_name=${TF_BACKEND_RG}" \
-            -backend-config="storage_account_name=${TF_BACKEND_SA}" \
-            -backend-config="container_name=${TF_BACKEND_CONTAINER}" \
-            -backend-config="key=dev.terraform.tfstate"
-
-      - name: Import existing Container App into Terraform state
-        uses: ./.github/actions/import-container-app
-        with:
-          projname: ${{ env.TF_VAR_projname }}
-          env: ${{ env.TF_VAR_env }}
-          location_short: ${{ env.TF_VAR_location_short }}
-          subscription_id: ${{ env.AZURE_SUBSCRIPTION_ID }}
-          terraform_working_directory: infra/env/dev
-
-      - name: Terraform Plan
-        run: |
-          terraform plan -out=tfplan
-
-      - name: Terraform Apply
-        run: |
-          terraform apply -auto-approve tfplan
-
-      - name: Get gateway URL
-        id: gw
-        run: echo "url=$(terraform output -raw gateway_url)" >> $GITHUB_OUTPUT
-
-      - name: Get dashboard URL
-        id: db
-        run: echo "url=$(terraform output -raw dashboard_url 2>/dev/null || true)" >> $GITHUB_OUTPUT
-
-      - name: Runtime diagnostics (Container App config)
-        shell: bash
-        run: |
-          set -euo pipefail
-          RG_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-rg-${TF_VAR_location_short}"
-          CA_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-ca-${TF_VAR_location_short}"
-          echo "Resource Group: ${RG_NAME}"
-          echo "Container App: ${CA_NAME}"
-          echo "Gateway URL (terraform output): ${{ steps.gw.outputs.url }}"
-          echo "Latest revision:"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.latestRevisionName" -o tsv
-          echo "Active revisions (name, active, created):"
-          az containerapp revision list -g "${RG_NAME}" -n "${CA_NAME}" --query "[].{name:name,active:properties.active,created:properties.createdTime}" -o table
-          echo "Configured env vars for LiteLLM secret refs:"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_AZURE_OPENAI_API_KEY' || name=='LITELLM_GATEWAY_KEY']" -o json
-          echo "Configured secret sources (names + key vault URLs):"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.configuration.secrets[].{name:name,keyVaultUrl:keyVaultUrl}" -o table
-          echo "LITELLM_CONFIG_CONTENT excerpt (first 2000 chars):"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_CONFIG_CONTENT'].value | [0]" -o tsv | head -c 2000 || true
-          echo
-
-      - name: Integration test (Azure OpenAI backend)
-        shell: bash
-        env:
-          AZURE_OPENAI_ENDPOINT: ${{ env.TF_VAR_azure_openai_endpoint }}
-          AZURE_OPENAI_API_KEY: ${{ env.TF_VAR_azure_openai_api_key }}
-          AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ env.TF_VAR_azure_openai_embedding_endpoint }}
-          AZURE_OPENAI_EMBEDDING_API_KEY: ${{ env.TF_VAR_azure_openai_embedding_api_key }}
-          AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ env.TF_VAR_embedding_deployment }}
-          AZURE_OPENAI_API_VERSION: ${{ env.TF_VAR_embeddings_api_version }}
-          AZURE_OPENAI_CHAT_DEPLOYMENT: "gpt-4.1"
-          AZURE_OPENAI_CHAT_API_VERSION: ${{ env.TF_VAR_codex_api_version }}
-          AZURE_OPENAI_CODEX_MODEL: ${{ env.TF_VAR_codex_model }}
-        working-directory: ${{ github.workspace }}
-        run: python3 scripts/integration_test.py
-
-      - name: Smoke test gateway (embeddings + responses)
-        uses: ./.github/actions/smoke-test-gateway
-        with:
-          gateway_url: ${{ steps.gw.outputs.url }}
-          gateway_key: ${{ secrets.AIGATEWAY_KEY }}
-          embedding_model: ${{ env.TF_VAR_embedding_deployment }}
-          codex_model: ${{ env.TF_VAR_codex_model }}
-          aoai_endpoint: ${{ env.TF_VAR_azure_openai_endpoint }}
-          aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
-          max_attempts: "3"
-          retry_sleep: "10"
-
-      - name: Smoke test shared state API (dashboard proxy)
-        if: env.TF_VAR_state_service_container_image != ''
-        shell: bash
-        run: |
-          set -euo pipefail
-          DASHBOARD_URL="${{ steps.db.outputs.url }}"
-          TEST_USER="ci-smoke-${TF_VAR_env}"
-
-          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/catalog" > /tmp/catalog.json
-
-          curl -fsS --connect-timeout 5 --max-time 15 -X PUT "${DASHBOARD_URL}/api/state/selection" \
-            -H "Content-Type: application/json" \
-            -H "X-User-Id: ${TEST_USER}" \
-            -d '{"enabled":true,"selected_model":"'"${TF_VAR_codex_model}"'"}' > /tmp/selection-put.json
-
-          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/selection" \
-            -H "X-User-Id: ${TEST_USER}" > /tmp/selection-get.json
-
-          jq -e '.enabled == true' /tmp/selection-get.json > /dev/null
-
-  deploy-uat:
-    name: Deploy uat
+    uses: ./.github/workflows/deploy-environment.yaml
+    with:
+      env_name: dev
+      tf_state_key: dev.terraform.tfstate
+      codex_model: gpt-5.3-codex
+      codex_api_version: 2025-04-01-preview
+      terraform_working_directory: infra/env/dev
+      smoke_retry_sleep: "10"
+      smoke_models_wait_sleep: "15"
+      include_aoai_host_check: false
+    secrets:
+      AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+      AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
+      AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
+      AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+
+  deploy-staging:
+    name: Deploy staging
     needs: plan
-    runs-on: ubuntu-latest
-    if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-uat')
-    environment: uat
-    defaults:
-      run:
-        working-directory: infra/env/uat
-
-    env:
-      TF_VAR_env: "uat"
-      TF_VAR_projname: "aigateway"
-      TF_VAR_location: "southafricanorth"
-      TF_VAR_location_short: "san"
-      TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
-      TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
-      TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
-      TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
-      TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
-      TF_VAR_codex_model: "gpt-5.3-codex"
-      TF_VAR_codex_api_version: "2025-04-01-preview"
-      TF_VAR_embedding_deployment: "text-embedding-3-large"
-      TF_VAR_embeddings_api_version: "2024-02-01"
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Quickcheck required secrets and config
-        shell: bash
-        run: |
-          set -euo pipefail
-          missing=0
-          required=(
-            AZURE_CLIENT_ID
-            AZURE_TENANT_ID
-            AZURE_SUBSCRIPTION_ID
-            TF_BACKEND_RG
-            TF_BACKEND_SA
-            TF_BACKEND_CONTAINER
-            TF_VAR_azure_openai_endpoint
-            TF_VAR_azure_openai_api_key
-            TF_VAR_gateway_key
-          )
-          for v in "${required[@]}"; do
-            if [ -z "${!v:-}" ]; then
-              echo "::error::Missing required value: ${v}"
-              missing=1
-            else
-              echo "${v}=SET"
-            fi
-          done
-          echo "TF_VAR_env=${TF_VAR_env:-unset}"
-          echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
-          echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
-          if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
-            echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
-            endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
-            echo "Azure OpenAI endpoint host=${endpoint_host}"
-          fi
-          if [ "${missing}" -ne 0 ]; then
-            exit 1
-          fi
-
-      - name: Azure Login
-        uses: azure/login@v2
-        with:
-          client-id: ${{ env.AZURE_CLIENT_ID }}
-          tenant-id: ${{ env.AZURE_TENANT_ID }}
-          subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
-
-      - name: Setup Terraform
-        uses: hashicorp/setup-terraform@v3
-        with:
-          terraform_version: 1.14.6
-
-      - name: Terraform Init
-        run: |
-          terraform init \
-            -backend-config="resource_group_name=${TF_BACKEND_RG}" \
-            -backend-config="storage_account_name=${TF_BACKEND_SA}" \
-            -backend-config="container_name=${TF_BACKEND_CONTAINER}" \
-            -backend-config="key=uat.terraform.tfstate"
-
-      - name: Import existing Container App into Terraform state
-        uses: ./.github/actions/import-container-app
-        with:
-          projname: ${{ env.TF_VAR_projname }}
-          env: ${{ env.TF_VAR_env }}
-          location_short: ${{ env.TF_VAR_location_short }}
-          subscription_id: ${{ env.AZURE_SUBSCRIPTION_ID }}
-          terraform_working_directory: infra/env/uat
-
-      - name: Terraform Plan
-        run: |
-          terraform plan -out=tfplan
-
-      - name: Terraform Apply
-        run: |
-          terraform apply -auto-approve tfplan
-
-      - name: Get gateway URL
-        id: gw
-        run: echo "url=$(terraform output -raw gateway_url)" >> $GITHUB_OUTPUT
-
-      - name: Get dashboard URL
-        id: db
-        run: echo "url=$(terraform output -raw dashboard_url 2>/dev/null || true)" >> $GITHUB_OUTPUT
-
-      - name: Runtime diagnostics (Container App config)
-        shell: bash
-        run: |
-          set -euo pipefail
-          RG_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-rg-${TF_VAR_location_short}"
-          CA_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-ca-${TF_VAR_location_short}"
-          echo "Resource Group: ${RG_NAME}"
-          echo "Container App: ${CA_NAME}"
-          echo "Gateway URL (terraform output): ${{ steps.gw.outputs.url }}"
-          echo "Latest revision:"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.latestRevisionName" -o tsv
-          echo "Active revisions (name, active, created):"
-          az containerapp revision list -g "${RG_NAME}" -n "${CA_NAME}" --query "[].{name:name,active:properties.active,created:properties.createdTime}" -o table
-          echo "Configured env vars for LiteLLM secret refs:"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_AZURE_OPENAI_API_KEY' || name=='LITELLM_GATEWAY_KEY']" -o json
-          echo "Configured secret sources (names + key vault URLs):"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.configuration.secrets[].{name:name,keyVaultUrl:keyVaultUrl}" -o table
-          echo "LITELLM_CONFIG_CONTENT excerpt (first 2000 chars):"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_CONFIG_CONTENT'].value | [0]" -o tsv | head -c 2000 || true
-          echo
-
-      - name: Integration test (Azure OpenAI backend)
-        shell: bash
-        env:
-          AZURE_OPENAI_ENDPOINT: ${{ env.TF_VAR_azure_openai_endpoint }}
-          AZURE_OPENAI_API_KEY: ${{ env.TF_VAR_azure_openai_api_key }}
-          AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ env.TF_VAR_azure_openai_embedding_endpoint }}
-          AZURE_OPENAI_EMBEDDING_API_KEY: ${{ env.TF_VAR_azure_openai_embedding_api_key }}
-          AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ env.TF_VAR_embedding_deployment }}
-          AZURE_OPENAI_API_VERSION: ${{ env.TF_VAR_embeddings_api_version }}
-          AZURE_OPENAI_CHAT_DEPLOYMENT: "gpt-4.1"
-          AZURE_OPENAI_CHAT_API_VERSION: ${{ env.TF_VAR_codex_api_version }}
-          AZURE_OPENAI_CODEX_MODEL: ${{ env.TF_VAR_codex_model }}
-        working-directory: ${{ github.workspace }}
-        run: python3 scripts/integration_test.py
-
-      - name: Smoke test gateway (embeddings + responses)
-        uses: ./.github/actions/smoke-test-gateway
-        with:
-          gateway_url: ${{ steps.gw.outputs.url }}
-          gateway_key: ${{ secrets.AIGATEWAY_KEY }}
-          embedding_model: ${{ env.TF_VAR_embedding_deployment }}
-          codex_model: ${{ env.TF_VAR_codex_model }}
-          aoai_endpoint: ${{ env.TF_VAR_azure_openai_endpoint }}
-          aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
-          max_attempts: "3"
-          retry_sleep: "10"
-
-      - name: Smoke test shared state API (dashboard proxy)
-        if: env.TF_VAR_state_service_container_image != ''
-        shell: bash
-        run: |
-          set -euo pipefail
-          DASHBOARD_URL="${{ steps.db.outputs.url }}"
-          TEST_USER="ci-smoke-${TF_VAR_env}"
-
-          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/catalog" > /tmp/catalog.json
-
-          curl -fsS --connect-timeout 5 --max-time 15 -X PUT "${DASHBOARD_URL}/api/state/selection" \
-            -H "Content-Type: application/json" \
-            -H "X-User-Id: ${TEST_USER}" \
-            -d '{"enabled":true,"selected_model":"'"${TF_VAR_codex_model}"'"}' > /tmp/selection-put.json
-
-          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/selection" \
-            -H "X-User-Id: ${TEST_USER}" > /tmp/selection-get.json
-
-          jq -e '.enabled == true' /tmp/selection-get.json > /dev/null
+    if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging')
+    environment: staging
+    uses: ./.github/workflows/deploy-environment.yaml
+    with:
+      env_name: staging
+      tf_state_key: staging.terraform.tfstate
+      codex_model: gpt-5.3-codex
+      codex_api_version: 2025-04-01-preview
+      terraform_working_directory: infra/env/staging
+      smoke_retry_sleep: "10"
+      smoke_models_wait_sleep: "15"
+      include_aoai_host_check: false
+    secrets:
+      AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+      AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
+      AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
+      AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
 
   deploy-prod:
     name: Deploy prod
     needs: plan
-    runs-on: ubuntu-latest
     if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
     environment: prod
-    defaults:
-      run:
-        working-directory: infra/env/prod
-
-    env:
-      TF_VAR_env: "prod"
-      TF_VAR_projname: "aigateway"
-      TF_VAR_location: "southafricanorth"
-      TF_VAR_location_short: "san"
-      TF_VAR_azure_openai_endpoint: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
-      TF_VAR_azure_openai_api_key: ${{ secrets.AZURE_OPENAI_API_KEY }}
-      TF_VAR_azure_openai_embedding_endpoint: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
-      TF_VAR_azure_openai_embedding_api_key: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
-      TF_VAR_gateway_key: ${{ secrets.AIGATEWAY_KEY }}
-      TF_VAR_codex_model: "gpt-4o"
-      TF_VAR_codex_api_version: "2025-01-01-preview"
-      TF_VAR_embedding_deployment: "text-embedding-3-large"
-      TF_VAR_embeddings_api_version: "2024-02-01"
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Quickcheck required secrets and config
-        shell: bash
-        run: |
-          set -euo pipefail
-          missing=0
-          required=(
-            AZURE_CLIENT_ID
-            AZURE_TENANT_ID
-            AZURE_SUBSCRIPTION_ID
-            TF_BACKEND_RG
-            TF_BACKEND_SA
-            TF_BACKEND_CONTAINER
-            TF_VAR_azure_openai_endpoint
-            TF_VAR_azure_openai_api_key
-            TF_VAR_gateway_key
-          )
-          for v in "${required[@]}"; do
-            if [ -z "${!v:-}" ]; then
-              echo "::error::Missing required value: ${v}"
-              missing=1
-            else
-              echo "${v}=SET"
-            fi
-          done
-          echo "TF_VAR_env=${TF_VAR_env:-unset}"
-          echo "TF_VAR_embedding_deployment=${TF_VAR_embedding_deployment:-unset}"
-          echo "TF_VAR_codex_model=${TF_VAR_codex_model:-unset}"
-          if [ -n "${TF_VAR_azure_openai_endpoint:-}" ]; then
-            echo "Azure OpenAI endpoint=${TF_VAR_azure_openai_endpoint}"
-            endpoint_host=$(echo "${TF_VAR_azure_openai_endpoint}" | sed -E 's#^https?://([^/]+)/?.*$#\1#')
-            echo "Azure OpenAI endpoint host=${endpoint_host}"
-            if [ -n "${EXPECTED_AOAI_ENDPOINT_HOST:-}" ] && [ "${endpoint_host}" != "${EXPECTED_AOAI_ENDPOINT_HOST}" ]; then
-              echo "::error::Prod AOAI endpoint host mismatch. Expected '${EXPECTED_AOAI_ENDPOINT_HOST}', got '${endpoint_host}'. Check environment secret AZURE_OPENAI_ENDPOINT."
-              missing=1
-            fi
-          fi
-          if [ "${missing}" -ne 0 ]; then
-            exit 1
-          fi
-
-      - name: Azure Login
-        uses: azure/login@v2
-        with:
-          client-id: ${{ env.AZURE_CLIENT_ID }}
-          tenant-id: ${{ env.AZURE_TENANT_ID }}
-          subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
-
-      - name: Setup Terraform
-        uses: hashicorp/setup-terraform@v3
-        with:
-          terraform_version: 1.14.6
-
-      - name: Terraform Init
-        run: |
-          terraform init \
-            -backend-config="resource_group_name=${TF_BACKEND_RG}" \
-            -backend-config="storage_account_name=${TF_BACKEND_SA}" \
-            -backend-config="container_name=${TF_BACKEND_CONTAINER}" \
-            -backend-config="key=prod.terraform.tfstate"
-
-      - name: Import existing Container App into Terraform state
-        uses: ./.github/actions/import-container-app
-        with:
-          projname: ${{ env.TF_VAR_projname }}
-          env: ${{ env.TF_VAR_env }}
-          location_short: ${{ env.TF_VAR_location_short }}
-          subscription_id: ${{ env.AZURE_SUBSCRIPTION_ID }}
-          terraform_working_directory: infra/env/prod
-
-      - name: Terraform Plan
-        run: |
-          terraform plan -out=tfplan
-
-      - name: Terraform Apply
-        run: |
-          terraform apply -auto-approve tfplan
-
-      - name: Get gateway URL
-        id: gw
-        run: echo "url=$(terraform output -raw gateway_url)" >> $GITHUB_OUTPUT
-
-      - name: Get dashboard URL
-        id: db
-        run: echo "url=$(terraform output -raw dashboard_url 2>/dev/null || true)" >> $GITHUB_OUTPUT
-
-      - name: Runtime diagnostics (Container App config)
-        shell: bash
-        run: |
-          set -euo pipefail
-          RG_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-rg-${TF_VAR_location_short}"
-          CA_NAME="pvc-${TF_VAR_env}-${TF_VAR_projname}-ca-${TF_VAR_location_short}"
-          echo "Resource Group: ${RG_NAME}"
-          echo "Container App: ${CA_NAME}"
-          echo "Gateway URL (terraform output): ${{ steps.gw.outputs.url }}"
-          echo "Latest revision:"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.latestRevisionName" -o tsv
-          echo "Active revisions (name, active, created):"
-          az containerapp revision list -g "${RG_NAME}" -n "${CA_NAME}" --query "[].{name:name,active:properties.active,created:properties.createdTime}" -o table
-          echo "Configured env vars for LiteLLM secret refs:"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_AZURE_OPENAI_API_KEY' || name=='LITELLM_GATEWAY_KEY']" -o json
-          echo "Configured secret sources (names + key vault URLs):"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.configuration.secrets[].{name:name,keyVaultUrl:keyVaultUrl}" -o table
-          echo "LITELLM_CONFIG_CONTENT excerpt (first 2000 chars):"
-          az containerapp show -g "${RG_NAME}" -n "${CA_NAME}" --query "properties.template.containers[0].env[?name=='LITELLM_CONFIG_CONTENT'].value | [0]" -o tsv | head -c 2000 || true
-          echo
-
-      - name: Integration test (Azure OpenAI backend)
-        shell: bash
-        env:
-          AZURE_OPENAI_ENDPOINT: ${{ env.TF_VAR_azure_openai_endpoint }}
-          AZURE_OPENAI_API_KEY: ${{ env.TF_VAR_azure_openai_api_key }}
-          AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ env.TF_VAR_azure_openai_embedding_endpoint }}
-          AZURE_OPENAI_EMBEDDING_API_KEY: ${{ env.TF_VAR_azure_openai_embedding_api_key }}
-          AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ env.TF_VAR_embedding_deployment }}
-          AZURE_OPENAI_API_VERSION: ${{ env.TF_VAR_embeddings_api_version }}
-          AZURE_OPENAI_CHAT_DEPLOYMENT: "gpt-4.1"
-          AZURE_OPENAI_CHAT_API_VERSION: ${{ env.TF_VAR_codex_api_version }}
-          AZURE_OPENAI_CODEX_MODEL: ${{ env.TF_VAR_codex_model }}
-        working-directory: ${{ github.workspace }}
-        run: python3 scripts/integration_test.py
-
-      - name: Smoke test gateway (embeddings + responses)
-        uses: ./.github/actions/smoke-test-gateway
-        with:
-          gateway_url: ${{ steps.gw.outputs.url }}
-          gateway_key: ${{ secrets.AIGATEWAY_KEY }}
-          embedding_model: ${{ env.TF_VAR_embedding_deployment }}
-          codex_model: ${{ env.TF_VAR_codex_model }}
-          aoai_endpoint: ${{ env.TF_VAR_azure_openai_endpoint }}
-          aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
-          max_attempts: "3"
-          retry_sleep: "15" # prod: longer cold-start; allow more time between retries
-          models_wait_attempts: "3" # prod: wait longer for LiteLLM to register healthy deployments
-          models_wait_sleep: "30"
-
-      - name: Smoke test shared state API (dashboard proxy)
-        if: env.TF_VAR_state_service_container_image != ''
-        shell: bash
-        run: |
-          set -euo pipefail
-          DASHBOARD_URL="${{ steps.db.outputs.url }}"
-          TEST_USER="ci-smoke-${TF_VAR_env}"
-
-          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/catalog" > /tmp/catalog.json
-
-          curl -fsS --connect-timeout 5 --max-time 15 -X PUT "${DASHBOARD_URL}/api/state/selection" \
-            -H "Content-Type: application/json" \
-            -H "X-User-Id: ${TEST_USER}" \
-            -d '{"enabled":true,"selected_model":"'"${TF_VAR_codex_model}"'"}' > /tmp/selection-put.json
-
-          curl -fsS --connect-timeout 5 --max-time 15 "${DASHBOARD_URL}/api/state/selection" \
-            -H "X-User-Id: ${TEST_USER}" > /tmp/selection-get.json
-
-          jq -e '.enabled == true' /tmp/selection-get.json > /dev/null
+    uses: ./.github/workflows/deploy-environment.yaml
+    with:
+      env_name: prod
+      tf_state_key: prod.terraform.tfstate
+      codex_model: gpt-4o
+      codex_api_version: 2025-01-01-preview
+      terraform_working_directory: infra/env/prod
+      smoke_retry_sleep: "15"
+      smoke_models_wait_sleep: "30"
+      include_aoai_host_check: true
+    secrets:
+      AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+      AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+      AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
+      AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
+      AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+
+  # Legacy inline deployments removed - see deploy-environment.yaml
diff --git a/README.md b/README.md
index d4dc2ba..dabd2de 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ Creates the shared resource group, storage account, and container for Terraform
 
 ### 2. Add GitHub secrets
 
-Add these secrets to each GitHub **Environment** (dev, uat, prod): **Settings → Environments → &lt;env&gt; → Environment secrets**.
+Add these secrets to each GitHub **Environment** (dev, staging, prod): **Settings → Environments → &lt;env&gt; → Environment secrets**.
 
 | Secret                  | Description                       | Example                                       |
 | ----------------------- | --------------------------------- | --------------------------------------------- |
@@ -53,16 +53,16 @@ Bootstrap prints these values. For local runs, copy `infra/.env.local.example` t
 **Bash:**
 
 ```bash
-./infra/scripts/terraform-init.sh dev   # or uat, prod
+./infra/scripts/terraform-init.sh dev   # or staging, prod
 ```
 
 **PowerShell:**
 
 ```powershell
-.\infra\scripts\terraform-init.ps1 -Env dev   # or uat, prod
+.\infra\scripts\terraform-init.ps1 -Env dev   # or staging, prod
 ```
 
-Valid environments: `dev`, `uat`, `prod`.
+Valid environments: `dev`, `staging`, `prod`.
 
 ### 4. Plan and apply
 
@@ -74,11 +74,11 @@ terraform apply
 
 ## Environments
 
-| Env  | Purpose         |
-| ---- | --------------- |
-| dev  | Development     |
-| uat  | User acceptance |
-| prod | Production      |
+| Env     | Purpose     |
+| ------- | ----------- |
+| dev     | Development |
+| staging | Staging     |
+| prod    | Production  |
 
 ## CI/CD
 
@@ -104,6 +104,6 @@ pnpm format
 
 - [PRD](docs/PRD.md) – Product requirements
 - [Terraform Blueprint](docs/Terraform_Blueprint.md) – Infrastructure design
-- [CI/CD Runbook](docs/CI_CD.md) – workflow behavior, UAT toggle, smoke tests
+- [CI/CD Runbook](docs/CI_CD.md) – workflow behavior, staging toggle, smoke tests
 - [Azure OIDC Setup](docs/AZURE_OIDC_SETUP.md) – GitHub Actions OIDC configuration
 - [Secrets Checklist](docs/SECRETS.md) – Copy/paste setup for GitHub environment secrets
diff --git a/dashboard/app.js b/dashboard/app.js
index 65011e4..63675e5 100644
--- a/dashboard/app.js
+++ b/dashboard/app.js
@@ -53,7 +53,7 @@ function escHtml(s) {
 
 function deriveEnv(url) {
   if (!url) return null;
-  const m = url.match(/pvc-(dev|uat|prod)-/);
+  const m = url.match(/pvc-(dev|staging|prod)-/);
   return m ? m[1] : null;
 }
 
diff --git a/docs/AZURE_OIDC_SETUP.md b/docs/AZURE_OIDC_SETUP.md
index 509156e..0c9518b 100644
--- a/docs/AZURE_OIDC_SETUP.md
+++ b/docs/AZURE_OIDC_SETUP.md
@@ -12,7 +12,7 @@ If you see:
 Error: AADSTS700213: No matching federated identity record found for presented assertion subject 'repo:phoenixvc/ai-gateway:environment:dev'
 ```
 
-**Cause:** The workflow uses `environment: dev` (and uat/prod), so the OIDC subject is `repo:org/repo:environment:dev`. Azure must have a federated credential with that exact subject.
+**Cause:** The workflow uses `environment: dev` (and staging/prod), so the OIDC subject is `repo:org/repo:environment:dev`. Azure must have a federated credential with that exact subject.
 
 ### Fix: Add environment federated credentials
 
@@ -32,21 +32,21 @@ az ad app list --display-name pvc-shared-github-actions-oidc --query "[0].appId"
 
 1. Go to **Azure Portal** → **Microsoft Entra ID** → **App registrations** → your app (e.g. `pvc-shared-github-actions-oidc`)
 2. **Certificates & secrets** → **Federated credentials** → **Add credential**
-3. For each environment (dev, uat, prod), add:
+3. For each environment (dev, staging, prod), add:
    - **Federated credential scenario:** GitHub Actions deploying Azure resources
    - **Organization:** phoenixvc
    - **Repository:** ai-gateway
    - **Entity type:** Environment
-   - **Environment name:** dev (or uat, prod)
-   - **Name:** github-actions-dev (or uat, prod)
+   - **Environment name:** dev (or staging, prod)
+   - **Name:** github-actions-dev (or staging, prod)
 
 ### Subject formats
 
-| Workflow config      | OIDC subject                                    |
-| -------------------- | ----------------------------------------------- |
-| `environment: dev`   | `repo:phoenixvc/ai-gateway:environment:dev`     |
-| `environment: uat`   | `repo:phoenixvc/ai-gateway:environment:uat`     |
-| `environment: prod`  | `repo:phoenixvc/ai-gateway:environment:prod`    |
-| Branch only (no env) | `repo:phoenixvc/ai-gateway:ref:refs/heads/main` |
+| Workflow config        | OIDC subject                                    |
+| ---------------------- | ----------------------------------------------- |
+| `environment: dev`     | `repo:phoenixvc/ai-gateway:environment:dev`     |
+| `environment: staging` | `repo:phoenixvc/ai-gateway:environment:staging` |
+| `environment: prod`    | `repo:phoenixvc/ai-gateway:environment:prod`    |
+| Branch only (no env)   | `repo:phoenixvc/ai-gateway:ref:refs/heads/main` |
 
 The federated credential **Subject** in Azure must match exactly.
diff --git a/docs/CI_CD.md b/docs/CI_CD.md
index ce180f0..a5a623a 100644
--- a/docs/CI_CD.md
+++ b/docs/CI_CD.md
@@ -6,15 +6,15 @@ This document describes the current GitHub Actions deployment behavior for `ai-g
 
 - PRs from forks are skipped for deployment-related jobs (no repo secrets).
 - PRs targeting `dev` run `plan` + `deploy-dev`.
-- PRs targeting `main` run UAT only when the PR has label `run-uat`.
+- PRs targeting `main` run staging only when the PR has label `run-staging`.
 - Push to `main` and `workflow_dispatch` run `plan` + `deploy-prod`.
 
-## Runtime UAT toggle
+## Runtime staging toggle
 
-UAT deployment for PRs to `main` is controlled by PR label:
+staging deployment for PRs to `main` is controlled by PR label:
 
-- Add label `run-uat` to enable `deploy-uat` for that PR.
-- Remove label `run-uat` to disable UAT for that PR.
+- Add label `run-staging` to enable `deploy-staging` for that PR.
+- Remove label `run-staging` to disable staging for that PR.
 
 ## Smoke test behavior
 
diff --git a/docs/PRD.md b/docs/PRD.md
index 6cc0c70..710823f 100644
--- a/docs/PRD.md
+++ b/docs/PRD.md
@@ -15,7 +15,7 @@ Roo/Qoder currently struggles with Azure model/operation mismatches. A gateway n
 2.  Support:
     - `POST /v1/responses` routed to Azure **Responses** endpoint for configurable model (default: `gpt-5.3-codex`).
     - `POST /v1/embeddings` routed to Azure embeddings deployment.
-3.  Enable **multiple environments** (dev/uat/prod) and **multiple downstream projects**.
+3.  Enable **multiple environments** (dev/staging/prod) and **multiple downstream projects**.
 4.  Infrastructure managed with **Terraform**.
 5.  CI/CD via **GitHub Actions** using **Azure OIDC** (no long-lived secrets).
 6.  “Get it working” first; hardening follows.
@@ -29,7 +29,7 @@ Roo/Qoder currently struggles with Azure model/operation mismatches. A gateway n
 ## 3) Environments
 
 - `dev`
-- `uat`
+- `staging`
 - `prod`
 
 Each env is independently deployable.
@@ -150,7 +150,7 @@ Gateway must expose:
   - `docs/` - Documentation.
   - `infra/`
     - `modules/aigateway_aca` - Core Terraform module.
-    - `env/dev|uat|prod` - Environment-specific configurations.
+    - `env/dev|staging|prod` - Environment-specific configurations.
   - `.github/workflows/` - CI/CD pipelines.
   - `scripts/` - Helper scripts (bootstrap).
 
@@ -161,13 +161,13 @@ Gateway must expose:
 - **Phase 1: Terraform & CI/CD**
   - Terraform defines infra.
   - GitHub Actions deploys using Azure OIDC.
-  - Dev auto-apply on merge; UAT/Prod gated with environment approvals.
+  - Dev auto-apply on merge; Staging/Prod gated with environment approvals.
 
 ## 10) Acceptance criteria
 
 1.  Roo/Qoder can use gateway for coding with configured model (default `gpt-5.3-codex`) without `chatCompletion operation does not work`.
 2.  Codebase indexing completes using embeddings through the gateway.
-3.  Dev/UAT/Prod are reproducible via Terraform + Actions.
+3.  Dev/staging/Prod are reproducible via Terraform + Actions.
 4.  No secrets committed.
 
 ## 11) Risks & mitigations
@@ -180,5 +180,5 @@ Gateway must expose:
 
 - M0: Repo setup, Bootstrap scripts (OIDC, State Backend).
 - M1: Dev env deployed; smoke tests pass; Roo works.
-- M2: UAT + Prod; environment approvals.
+- M2: staging + Prod; environment approvals.
 - M3: Hardening (Front Door/WAF, Entra auth).
diff --git a/docs/SECRETS.md b/docs/SECRETS.md
index 4463421..460097e 100644
--- a/docs/SECRETS.md
+++ b/docs/SECRETS.md
@@ -2,17 +2,17 @@
 
 Copy this checklist when setting up environments for this repo.
 
-For workflow behavior (dev/uat/prod triggers, PR label `run-uat`, and smoke-test flow), see [CI_CD.md](CI_CD.md).
+For workflow behavior (dev/staging/prod triggers, PR label `run-staging`, and smoke-test flow), see [CI_CD.md](CI_CD.md).
 
 ## Where to add secrets
 
 Add these as **Environment secrets** in GitHub:
 
 - **Settings → Environments → dev → Environment secrets**
-- **Settings → Environments → uat → Environment secrets**
+- **Settings → Environments → staging → Environment secrets**
 - **Settings → Environments → prod → Environment secrets**
 
-> This workflow is environment-based (`environment: dev|uat|prod`), so each environment should have the full secret set.
+> This workflow is environment-based (`environment: dev|staging|prod`), so each environment should have the full secret set.
 
 ## Required secrets (all environments)
 
@@ -53,7 +53,7 @@ When `STATE_SERVICE_CONTAINER_IMAGE` is set (state-service enabled), set this se
 
 ## Copy/paste template
 
-Use this block as a setup checklist when creating/updating `dev`, `uat`, and `prod`:
+Use this block as a setup checklist when creating/updating `dev`, `staging`, and `prod`:
 
 ```text
 AZURE_CLIENT_ID=<GUID>
@@ -82,13 +82,13 @@ STATE_SERVICE_REGISTRY_PASSWORD=<ghcr-read-packages-token>   # required for priv
 - [ ] `AIGATEWAY_KEY` matches the key expected by the deployed gateway.
 - [ ] OIDC federated credentials exist for each environment subject:
   - `repo:phoenixvc/ai-gateway:environment:dev`
-  - `repo:phoenixvc/ai-gateway:environment:uat`
+  - `repo:phoenixvc/ai-gateway:environment:staging`
   - `repo:phoenixvc/ai-gateway:environment:prod`
 
-## Runtime UAT toggle
+## Runtime staging toggle
 
-- UAT deploy on PRs into `main` is controlled by PR label `run-uat`.
-- Add label `run-uat` to enable `deploy-uat` for that PR.
-- Remove label `run-uat` to skip UAT for that PR.
+- Staging deploy on PRs into `main` is controlled by PR label `run-staging`.
+- Add label `run-staging` to enable `deploy-staging` for that PR.
+- Remove label `run-staging` to skip staging for that PR.
 
 For OIDC troubleshooting, see [AZURE_OIDC_SETUP.md](AZURE_OIDC_SETUP.md).
diff --git a/docs/Terraform_Blueprint.md b/docs/Terraform_Blueprint.md
index 54ea563..f1027b0 100644
--- a/docs/Terraform_Blueprint.md
+++ b/docs/Terraform_Blueprint.md
@@ -3,7 +3,7 @@
 This canvas includes a working Terraform scaffold:
 
 - `infra/modules/aigateway_aca`
-- `infra/env/dev|uat|prod`
+- `infra/env/dev|staging|prod`
 - Shared state configured via `terraform init -backend-config=...` in GitHub Actions
 
 > Notes:
@@ -27,7 +27,7 @@ infra/
       main.tf
       variables.tf
       terraform.tfvars
-    uat/
+    staging/
       main.tf
       variables.tf
       terraform.tfvars
@@ -44,7 +44,7 @@ infra/
 ```hcl
 variable "env" {
   type        = string
-  description = "Environment name (dev|uat|prod)"
+  description = "Environment name (dev|staging|prod)"
 }
 
 variable "projname" {
@@ -343,7 +343,7 @@ output "key_vault_name" {
 
 ## 5) Env stacks
 
-### 5.1 `infra/env/dev/variables.tf` (repeat for uat/prod)
+### 5.1 `infra/env/dev/variables.tf` (repeat for staging/prod)
 
 ```hcl
 variable "env" { type = string }
@@ -441,7 +441,7 @@ tags = {
 }
 ```
 
-Repeat the env folders for `uat` and `prod`, changing only `env` and tags.
+Repeat the env folders for `staging` and `prod`, changing only `env` and tags.
 
 ---
 
diff --git a/docs/architecture/01-system-context.md b/docs/architecture/01-system-context.md
new file mode 100644
index 0000000..9c20e95
--- /dev/null
+++ b/docs/architecture/01-system-context.md
@@ -0,0 +1,87 @@
+# System Context
+
+Status: Accepted
+Date: 2026-03-15
+Owners: PhoenixVC Architecture Group
+
+## Context
+
+The PhoenixVC AI Platform integrates multiple intelligent systems designed to support:
+
+- AI request routing and governance
+- Multi-agent orchestration
+- Developer workflow intelligence
+- Tool-driven agent execution
+- Edge telemetry interpretation
+
+The platform consists of five major subsystems:
+
+1. AI Gateway
+2. Cognitive Mesh
+3. CodeFlow Engine
+4. AgentKit Forge
+5. PhoenixRooivalk
+
+These systems operate across both cloud infrastructure and edge deployments, and rely on a hybrid SLM + LLM architecture for performance, cost efficiency, and reasoning capability.
+
+## Decision
+
+Adopt a layered architecture where:
+
+- AI Gateway acts as the control-plane entry point
+- SLMs perform routing, triage, screening, and compression
+- LLMs are used selectively for high-value reasoning
+- Edge systems remain locally autonomous when necessary
+
+## System Context Diagram
+
+```mermaid
+flowchart TB
+    User[Users / Operators / Developers]
+    Apps[Client Apps / APIs]
+    GitHub[GitHub / CI Events]
+    Sensors[PhoenixRooivalk Sensors]
+    Providers[Model Providers]
+    Tools[External Tools / APIs]
+
+    subgraph Platform
+        AIG[AI Gateway]
+        CM[Cognitive Mesh]
+        CFE[CodeFlow Engine]
+        AKF[AgentKit Forge]
+        PR[PhoenixRooivalk]
+    end
+
+    User --> AIG
+    Apps --> AIG
+    GitHub --> CFE
+    Sensors --> PR
+
+    AIG --> CM
+    AIG --> CFE
+    AIG --> AKF
+    AIG --> PR
+
+    CM --> Providers
+    AKF --> Providers
+    CFE --> Providers
+
+    CM --> Tools
+    AKF --> Tools
+    CFE --> Tools
+```
+
+## Consequences
+
+### Advantages
+
+- centralized governance of AI usage
+- consistent routing logic
+- scalable orchestration
+- edge autonomy
+
+### Tradeoffs
+
+- additional architectural complexity
+- routing model calibration required
+- shared telemetry contracts required
diff --git a/docs/architecture/02-container-architecture.md b/docs/architecture/02-container-architecture.md
new file mode 100644
index 0000000..2b66950
--- /dev/null
+++ b/docs/architecture/02-container-architecture.md
@@ -0,0 +1,95 @@
+# Container Architecture
+
+Status: Accepted
+Date: 2026-03-15
+
+## Context
+
+To support scalability and independent evolution of system capabilities, the platform is decomposed into containerized services.
+
+Each service is responsible for a clearly bounded domain.
+
+## Container Diagram
+
+```mermaid
+flowchart TB
+    subgraph Clients
+        C1[Chat UI]
+        C2[Internal Apps]
+        C3[GitHub Webhooks]
+        C4[Operator Console]
+    end
+
+    subgraph Gateway
+        G1[Ingress API]
+        G2[SLM Classifier]
+        G3[Policy Scan]
+        G4[Budget Router]
+        G5[Semantic Cache]
+        G6[Escalation Judge]
+    end
+
+    subgraph Mesh
+        M1[Specialist Router]
+        M2[Task Decomposer]
+        M3[State Manager]
+        M4[Synthesis Coordinator]
+    end
+
+    subgraph Forge
+        F1[Tool Selector]
+        F2[Argument Extractor]
+        F3[Execution Loop]
+        F4[Result Compressor]
+    end
+
+    subgraph CodeFlow
+        CF1[PR Classifier]
+        CF2[Risk Scorer]
+        CF3[CI Triage]
+        CF4[Review Engine]
+    end
+
+    subgraph Models
+        SLM[SLM Pool]
+        LLM[LLM Pool]
+    end
+
+    C1 --> G1
+    C2 --> G1
+    C4 --> G1
+
+    G1 --> G2
+    G2 --> G3
+    G3 --> G4
+    G4 --> G5
+    G5 --> G6
+
+    G6 --> M1
+    G6 --> F1
+    G6 --> CF1
+
+    M1 --> M2
+    M2 --> M3
+    M3 --> M4
+
+    F1 --> F2
+    F2 --> F3
+    F3 --> F4
+
+    CF1 --> CF2
+    CF2 --> CF3
+    CF3 --> CF4
+```
+
+## Consequences
+
+### Benefits
+
+- service isolation
+- independent scaling
+- clearer ownership
+
+### Tradeoffs
+
+- increased service orchestration complexity
diff --git a/docs/architecture/03-deployment-trust-boundaries.md b/docs/architecture/03-deployment-trust-boundaries.md
new file mode 100644
index 0000000..18724c7
--- /dev/null
+++ b/docs/architecture/03-deployment-trust-boundaries.md
@@ -0,0 +1,82 @@
+# Deployment and Trust Boundaries
+
+Status: Accepted
+
+## Context
+
+The system interacts with external users, internal services, model providers, and edge devices. Clear trust boundaries must be established.
+
+## Trust Boundary Diagram
+
+```mermaid
+flowchart LR
+    subgraph Public
+        A[Users]
+        B[GitHub]
+        C[External Apps]
+    end
+
+    subgraph Ingress
+        D[API Gateway / WAF]
+        E[AI Gateway]
+    end
+
+    subgraph ControlPlane
+        F[Policy Engine]
+        G[Session Store]
+        H[Semantic Cache]
+        I[Observability]
+    end
+
+    subgraph Execution
+        J[Cognitive Mesh]
+        K[AgentKit Forge]
+        L[CodeFlow Engine]
+    end
+
+    subgraph Integration
+        M[Key Vault]
+        N[Azure APIs]
+        O[GitHub APIs]
+    end
+
+    subgraph ExternalModels
+        P[LLM Providers]
+    end
+
+    subgraph Edge
+        Q[PhoenixRooivalk Node]
+        R[Sensors]
+    end
+
+    A --> D
+    B --> D
+    C --> D
+    D --> E
+
+    E --> F
+    E --> G
+    E --> H
+    E --> I
+
+    E --> J
+    E --> K
+    E --> L
+
+    J --> N
+    K --> N
+    L --> O
+
+    E --> M
+    E --> P
+
+    R --> Q
+    Q --> E
+```
+
+## Security Principles
+
+- **Gateway is the only public AI ingress.**
+- **Secrets only accessed through Key Vault.**
+- **Tool access occurs through controlled brokers.**
+- **Edge nodes operate under constrained trust.**
diff --git a/docs/architecture/04-observability-telemetry.md b/docs/architecture/04-observability-telemetry.md
new file mode 100644
index 0000000..3afe313
--- /dev/null
+++ b/docs/architecture/04-observability-telemetry.md
@@ -0,0 +1,94 @@
+# Observability and Telemetry
+
+Status: Accepted
+
+## Context
+
+Cross-system observability is required for:
+
+- cost visibility
+- routing quality measurement
+- policy enforcement evidence
+- debugging and operational monitoring
+
+## Telemetry Architecture
+
+```mermaid
+flowchart TB
+    subgraph Producers
+        P1[AI Gateway]
+        P2[Cognitive Mesh]
+        P3[AgentKit Forge]
+        P4[CodeFlow Engine]
+        P5[Rooivalk Edge]
+    end
+
+    subgraph Signals
+        S1[Request Logs]
+        S2[Routing Decisions]
+        S3[Policy Events]
+        S4[Tool Calls]
+        S5[Model Usage]
+        S6[Edge Events]
+    end
+
+    subgraph Ingest
+        I1[OpenTelemetry]
+        I2[Azure Monitor]
+        I3[Blob Export]
+    end
+
+    subgraph Analytics
+        A1[Azure Data Explorer]
+        A2[Cost Aggregates]
+        A3[Quality Metrics]
+    end
+
+    subgraph Visualization
+        V1[Grafana]
+        V2[Alerts]
+    end
+
+    P1 --> S1
+    P1 --> S2
+    P1 --> S5
+    P2 --> S2
+    P3 --> S4
+    P4 --> S1
+    P5 --> S6
+
+    S1 --> I1
+    S2 --> I1
+    S4 --> I1
+    S5 --> I2
+    S6 --> I3
+
+    I1 --> A1
+    I2 --> A1
+    I3 --> A1
+
+    A1 --> V1
+    V1 --> V2
+```
+
+## Key Metrics
+
+### Gateway
+
+- routing decision distribution
+- SLM vs LLM usage ratio
+- cache hit rate
+
+### CodeFlow
+
+- PR classification accuracy
+- CI triage distribution
+
+### AgentKit
+
+- tool selection success rate
+
+### Rooivalk
+
+- alert compression ratio
+- edge escalation frequency
diff --git a/docs/architecture/05-slm-llm-decision-flow.md b/docs/architecture/05-slm-llm-decision-flow.md
new file mode 100644
index 0000000..124e27a
--- /dev/null
+++ b/docs/architecture/05-slm-llm-decision-flow.md
@@ -0,0 +1,60 @@
+# SLM to LLM Decision Flow
+
+Status: Accepted
+
+## Context
+
+Small Language Models are used as the operational cognition layer, while Large Language Models perform high-value reasoning.
+
+## Decision Flow
+
+```mermaid
+flowchart TD
+    A[Incoming Request]
+    B[SLM Preprocess]
+    C[Intent Classification]
+    D[Policy Scan]
+    E[Tool Check]
+    F[Complexity Estimate]
+    G[Confidence Score]
+
+    A --> B
+    B --> C
+    C --> D
+    D --> E
+    E --> F
+    F --> G
+
+    G --> H{Policy violation?}
+    H -->|Yes| X[Block / Redact]
+    H -->|No| I{Simple task?}
+
+    I -->|Yes| Y[Return SLM result]
+    I -->|No| J{Tool first?}
+
+    J -->|Yes| K[Execute Tool]
+    K --> L[SLM Compress Result]
+    L --> M{Enough?}
+
+    M -->|Yes| Y
+    M -->|No| N[Escalate]
+
+    J -->|No| N
+
+    N --> O[LLM Reasoning]
+    O --> P[Post-check]
+    P --> Q[Return Response]
+```
+
+## Consequences
+
+### Benefits
+
+- reduced inference cost
+- lower latency
+- improved throughput
+
+### Risks
+
+- incorrect routing
+- model confidence calibration required
diff --git a/docs/architecture/06-shared-contracts.md b/docs/architecture/06-shared-contracts.md
new file mode 100644
index 0000000..4b19404
--- /dev/null
+++ b/docs/architecture/06-shared-contracts.md
@@ -0,0 +1,57 @@
+# Shared Contracts
+
+Status: Accepted
+
+## Routing Decision
+
+```json
+{
+  "intent": "string",
+  "complexity": "low|medium|high",
+  "risk_level": "low|medium|high|critical",
+  "policy_status": "allow|redact|deny|review",
+  "needs_tool": true,
+  "recommended_tier": "slm|llm",
+  "recommended_path": "direct|tool_first|mesh|escalate",
+  "confidence": 0.0
+}
+```
+
+## Model Usage Event
+
+```json
+{
+  "trace_id": "uuid",
+  "system": "ai-gateway",
+  "model_tier": "slm",
+  "model_name": "model-id",
+  "token_in": 320,
+  "token_out": 64,
+  "latency_ms": 41,
+  "estimated_cost": 0.0002
+}
+```
+
+## Tool Execution Event
+
+```json
+{
+  "trace_id": "uuid",
+  "tool_name": "azure_cli",
+  "action": "query_metrics",
+  "success": true,
+  "latency_ms": 820
+}
+```
+
+## Edge Escalation Packet
+
+```json
+{
+  "event_id": "uuid",
+  "site_id": "string",
+  "event_label": "rf_anomaly",
+  "summary": "Drone signature detected near perimeter",
+  "confidence": 0.78
+}
+```
diff --git a/docs/architecture/07-repo-ownership-map.md b/docs/architecture/07-repo-ownership-map.md
new file mode 100644
index 0000000..341323f
--- /dev/null
+++ b/docs/architecture/07-repo-ownership-map.md
@@ -0,0 +1,28 @@
+# Repository Ownership Map
+
+Status: Accepted
+
+## Repository Map
+
+```mermaid
+flowchart LR
+    R1[pvc-ai-gateway] --> S1[AI Gateway Service]
+    R2[cognitive-mesh] --> S2[Cognitive Mesh]
+    R3[codeflow-engine] --> S3[CodeFlow Engine]
+    R4[agentkit-forge] --> S4[AgentKit Forge]
+    R5[phoenixrooivalk] --> S5[Rooivalk Edge / Command]
+    R6[shared-contracts] --> S6[Shared Contracts]
+    R7[infra] --> S7[Infrastructure / Monitoring]
+```
+
+## Ownership
+
+| Repository           | Owns                                                   |
+| -------------------- | ------------------------------------------------------ |
+| **AI Gateway**       | request routing, policy enforcement, model abstraction |
+| **Cognitive Mesh**   | orchestration, multi-agent coordination                |
+| **CodeFlow Engine**  | CI/CD intelligence, PR analysis                        |
+| **AgentKit Forge**   | tool-driven agents, execution runtime                  |
+| **PhoenixRooivalk**  | edge telemetry, operator alerts                        |
+| **Shared Contracts** | telemetry schema, routing decisions, audit envelope    |
+| **Infrastructure**   | Azure deployment, monitoring, networking               |
diff --git a/docs/architecture/README.md b/docs/architecture/README.md
new file mode 100644
index 0000000..fae13c6
--- /dev/null
+++ b/docs/architecture/README.md
@@ -0,0 +1,214 @@
+# Architecture
+
+This directory contains system architecture documentation for the AI Gateway and related systems.
+
+## Overview
+
+The architecture follows a layered approach combining:
+
+- **SLMs (Small Language Models)** for cost-effective routing, classification, and tool selection
+- **LLMs** for complex reasoning and final synthesis
+
+### Canonical Principle
+
+> **Use SLMs to decide, filter, classify, compress, and prepare.**
+> **Use LLMs to reason, reconcile, synthesize, and communicate.**
+
+## Documentation Structure
+
+```
+docs/architecture/
+├── README.md                    # This file
+├── 01-system-context.md         # ADR: System Context
+├── 02-container-architecture.md # ADR: Container Architecture
+├── 03-deployment-trust-boundaries.md # ADR: Deployment & Trust Boundaries
+├── 04-observability-telemetry.md    # ADR: Observability & Telemetry
+├── 05-slm-llm-decision-flow.md   # ADR: SLM→LLM Decision Flow
+├── 06-shared-contracts.md       # ADR: Shared Contracts
+├── 07-repo-ownership-map.md      # ADR: Repository Ownership
+├── systems/                     # Individual system documentation
+│   ├── ai-gateway.md
+│   ├── cognitive-mesh.md
+│   ├── codeflow-engine.md
+│   ├── agentkit-forge.md
+│   ├── phoenix-rooivalk.md
+│   └── mystira.md
+└── reference/                   # Reference and planning docs
+    ├── cross-system.md
+    ├── c4-architecture.md
+    ├── deployment-observability.md
+    ├── contracts.md
+    ├── operations-patterns.md
+    ├── dashboards.md
+    ├── slm-implementation-matrix.md
+    ├── slm-management-plan.md
+    ├── matrix-gateway.md
+    ├── matrix-cognitive-mesh.md
+    ├── matrix-codeflow.md
+    ├── matrix-agentkit.md
+    ├── matrix-rooivalk.md
+    ├── matrix-mystira.md
+    └── strategic/                # Strategic guidance
+        ├── README.md
+        ├── 01-why-slms-matter.md
+        ├── 02-gateway-slm-use-cases.md
+        ├── 03-cognitive-mesh-use-cases.md
+        ├── 04-codeflow-use-cases.md
+        ├── 05-agentkit-use-cases.md
+        ├── 06-rooivalk-use-cases.md
+        ├── 07-deployment-model.md
+        └── 08-implementation-order.md
+```
+
+docs/architecture/
+├── README.md # This file
+├── systems/ # Individual system documentation
+│ ├── ai-gateway.md
+│ ├── cognitive-mesh.md
+│ ├── codeflow-engine.md
+│ ├── agentkit-forge.md
+│ ├── phoenix-rooivalk.md
+│ └── mystira.md
+└── reference/ # Reference and planning docs
+├── cross-system.md
+├── c4-architecture.md
+├── deployment-observability.md
+├── contracts.md
+├── operations-patterns.md
+├── dashboards.md
+├── slm-implementation-matrix.md
+├── slm-management-plan.md
+├── matrix-gateway.md
+├── matrix-cognitive-mesh.md
+├── matrix-codeflow.md
+├── matrix-agentkit.md
+├── matrix-rooivalk.md
+├── matrix-mystira.md
+└── strategic/ # Strategic guidance
+├── README.md
+├── 01-why-slms-matter.md
+├── 02-gateway-slm_use-cases.md
+├── 03-cognitive-mesh-use-cases.md
+├── 04-codeflow-use-cases.md
+├── 05-agentkit-use-cases.md
+├── 06-rooivalk-use-cases.md
+├── 07-deployment-model.md
+└── 08-implementation-order.md
+
+```
+
+docs/architecture/
+├── README.md # This file
+├── systems/ # Individual system documentation
+│ ├── ai-gateway.md
+│ ├── cognitive-mesh.md
+│ ├── codeflow-engine.md
+│ ├── agentkit-forge.md
+│ ├── phoenix-rooivalk.md
+│ └── mystira.md
+└── reference/ # Reference and planning docs
+├── cross-system.md
+├── slm-implementation-matrix.md
+├── slm-management-plan.md
+├── matrix-gateway.md
+├── matrix-cognitive-mesh.md
+├── matrix-codeflow.md
+├── matrix-agentkit.md
+├── matrix-rooivalk.md
+├── matrix-mystira.md
+└── strategic/ # Strategic guidance
+├── README.md
+├── 01-why-slms-matter.md
+├── 02-gateway-slm-use-cases.md
+├── 03-cognitive-mesh-use-cases.md
+├── 04-codeflow-use-cases.md
+├── 05-agentkit-use-cases.md
+├── 06-rooivalk-use-cases.md
+├── 07-deployment-model.md
+└── 08-implementation-order.md
+
+```
+
+### Systems
+
+- [systems/ai-gateway.md](systems/ai-gateway.md) - AI Gateway: SLM as admission control & routing
+- [systems/cognitive-mesh.md](systems/cognitive-mesh.md) - Agent orchestration: routing, decomposition
+- [systems/codeflow-engine.md](systems/codeflow-engine.md) - CI/CD intelligence: PR triage, log analysis
+- [systems/agentkit-forge.md](systems/agentkit-forge.md) - Agent building: tool selection, context compression
+- [systems/phoenix-rooivalk.md](systems/phoenix-rooivalk.md) - Edge AI: SLM for reports only (NOT control)
+- [systems/mystira.md](systems/mystira.md) - Story generation: SLM as moderation, age-fit, continuity layer
+
+### Reference
+
+- [reference/cross-system.md](reference/cross-system.md) - How all systems integrate
+- [reference/c4-architecture.md](reference/c4-architecture.md) - C4-style diagrams (context, containers, sequences)
+- [reference/deployment-observability.md](reference/deployment-observability.md) - Deployment, trust boundaries, observability
+- [reference/contracts.md](reference/contracts.md) - Shared JSON schemas for telemetry and routing
+- [reference/operations-patterns.md](reference/operations-patterns.md) - SLM→LLM decision flows, ownership, implementation
+- [reference/dashboards.md](reference/dashboards.md) - Recommended Grafana/ADX dashboards
+- [reference/slm-implementation-matrix.md](reference/slm-implementation-matrix.md) - Overview with threshold summary
+- [reference/slm-management-plan.md](reference/slm-management-plan.md) - Cross-project SLM management
+
+### Strategic Guidance
+
+- [reference/strategic/README.md](reference/strategic/README.md) - Strategic SLM guidance index
+- [reference/strategic/01-why-slms-matter.md](reference/strategic/01-why-slms-matter.md) - Executive summary
+- [reference/strategic/02-gateway-slm-use-cases.md](reference/strategic/02-gateway-slm-use-cases.md) - AI Gateway use cases
+- [reference/strategic/03-cognitive-mesh-use-cases.md](reference/strategic/03-cognitive-mesh-use-cases.md) - Cognitive Mesh use cases
+- [reference/strategic/04-codeflow-use-cases.md](reference/strategic/04-codeflow-use-cases.md) - CodeFlow Engine use cases
+- [reference/strategic/05-agentkit-use-cases.md](reference/strategic/05-agentkit-use-cases.md) - AgentKit Forge use cases
+- [reference/strategic/06-rooivalk-use-cases.md](reference/strategic/06-rooivalk-use-cases.md) - PhoenixRooivalk use cases
+- [reference/strategic/07-deployment-model.md](reference/strategic/07-deployment-model.md) - Deployment model
+- [reference/strategic/08-implementation-order.md](reference/strategic/08-implementation-order.md) - Implementation order
+
+## Quick Reference
+
+| System          | SLM Role                                  | Key Document                                               |
+| --------------- | ----------------------------------------- | ---------------------------------------------------------- |
+| AI Gateway      | routing, policy checks, cost prediction   | [systems/ai-gateway.md](systems/ai-gateway.md)             |
+| Cognitive Mesh  | agent routing, task decomposition         | [systems/cognitive-mesh.md](systems/cognitive-mesh.md)     |
+| PhoenixRooivalk | **operator summaries only**               | [systems/phoenix-rooivalk.md](systems/phoenix-rooivalk.md) |
+| CodeFlow Engine | CI intelligence, log analysis             | [systems/codeflow-engine.md](systems/codeflow-engine.md)   |
+| AgentKit Forge  | tool selection, context compression       | [systems/agentkit-forge.md](systems/agentkit-forge.md)     |
+| Mystira         | story classification, moderation, age-fit | [systems/mystira.md](systems/mystira.md)                   |
+
+## Implementation Order
+
+1. **AI Gateway SLM router** — Highest immediate cost-leverage
+2. **CodeFlow Engine CI/PR classifier** — Fastest operational value
+3. **Cognitive Mesh decomposer/router** — Strong leverage once taxonomy stabilizes
+4. **AgentKit Forge tool selector** — Useful once tool inventory is mature
+5. **PhoenixRooivalk operator interpreter** — Valuable, keep isolated from critical control
+6. **Mystira story control layer** — For child-safe story generation with SLM-based moderation
+
+## Tiered Model Strategy
+
+| Tier   | Use For               | Examples                                      |
+| ------ | --------------------- | --------------------------------------------- |
+| Tier 0 | deterministic/non-LLM | regex, schemas, policies                      |
+| Tier 1 | SLM                   | classification, decomposition, tool selection |
+| Tier 2 | LLM                   | synthesis, complex reasoning                  |
+
+## Diagram Tools
+
+This documentation uses **Mermaid** for inline diagrams (rendered in VS Code, GitHub, etc.).
+
+For high-quality published diagrams, consider:
+
+- **Figma MCP** - AI-powered Figma integration via VS Code extension
+- **Mermaid Live Editor** - Online Mermaid diagram editing
+- **Draw.io** - Traditional diagram editor
+
+### Using Figma MCP for Architecture Diagrams
+
+The [MCP Figma VS Code extension](https://github.com/sethdford/mcp-figma) enables AI-assisted diagram creation:
+
+1. Install the extension in VS Code
+2. Configure MCP server for your AI assistant
+3. Use AI to generate and edit architecture diagrams in Figma
+
+This is useful for creating polished, branded diagrams for presentations and documentation.
+
+```
+
+```
diff --git a/docs/architecture/reference/c4-architecture.md b/docs/architecture/reference/c4-architecture.md
new file mode 100644
index 0000000..0ce3c98
--- /dev/null
+++ b/docs/architecture/reference/c4-architecture.md
@@ -0,0 +1,332 @@
+# C4-Style Architecture
+
+This section provides C4-style diagrams showing system context, containers, and key sequences.
+
+## 1. System Context
+
+This shows the major external actors and the five core systems.
+
+```mermaid
+flowchart TB
+    User[Users / Operators / Developers]
+    Apps[Client Apps / Internal Portals / APIs]
+    GitHub[GitHub / CI Events / PRs / Issues]
+    Sensors[PhoenixRooivalk Sensors / RF / EO / Radar / Telemetry]
+    Providers[Model Providers / Hosted Models]
+    Tools[Azure / Terraform / Kusto / GitHub APIs / Internal Tools]
+
+    subgraph Platform["PhoenixVC AI Platform"]
+        AIG[AI Gateway]
+        CM[Cognitive Mesh]
+        CFE[CodeFlow Engine]
+        AKF[AgentKit Forge]
+        PR[PhoenixRooivalk Edge + Command Layer]
+    end
+
+    User --> AIG
+    Apps --> AIG
+    GitHub --> CFE
+    Sensors --> PR
+
+    AIG --> CM
+    AIG --> CFE
+    AIG --> AKF
+    AIG --> PR
+
+    CM --> Providers
+    CFE --> Providers
+    AKF --> Providers
+    AIG --> Providers
+
+    CM --> Tools
+    CFE --> Tools
+    AKF --> Tools
+    PR --> AIG
+```
+
+### External Actors
+
+| Actor                          | Role                                       |
+| ------------------------------ | ------------------------------------------ |
+| Users / Operators / Developers | Initiate requests, reviews, investigations |
+| Apps / APIs                    | Consume AI control plane programmatically  |
+| GitHub                         | Triggers software delivery workflows       |
+| Sensors                        | Produce edge telemetry                     |
+| Model Providers                | Serve LLM/SLM inference                    |
+| Tools                          | Execution surfaces, enterprise integration |
+
+### System Roles
+
+| System          | Role                                         |
+| --------------- | -------------------------------------------- |
+| AI Gateway      | Front door, routing, policy, budget, caching |
+| Cognitive Mesh  | Multi-agent coordination and synthesis       |
+| CodeFlow Engine | SDLC/CI intelligence                         |
+| AgentKit Forge  | Tool-driven agent execution                  |
+| PhoenixRooivalk | Edge detection interpretation                |
+
+---
+
+## 2. Container Diagram
+
+```mermaid
+flowchart TB
+    subgraph Clients["Clients / Event Sources"]
+        C1[Web UI / Chat UI]
+        C2[Internal Apps / APIs]
+        C3[GitHub Webhooks]
+        C4[Operator Console]
+    end
+
+    subgraph Gateway["AI Gateway"]
+        G1[Ingress API]
+        G2[SLM Classifier]
+        G3[Policy Scan]
+        G4[Budget Router]
+        G5[Semantic Cache]
+        G6[Escalation Judge]
+    end
+
+    subgraph Mesh["Cognitive Mesh"]
+        M1[Specialist Router]
+        M2[Task Decomposer]
+        M3[State Manager]
+        M4[Synthesis Coordinator]
+    end
+
+    subgraph Forge["AgentKit Forge"]
+        F1[Tool Selector]
+        F2[Argument Extractor]
+        F3[Execution Loop]
+        F4[Result Compressor]
+    end
+
+    subgraph CodeFlow["CodeFlow Engine"]
+        CF1[PR / Diff Classifier]
+        CF2[Risk Scorer]
+        CF3[CI Failure Triage]
+        CF4[Review / Action Engine]
+    end
+
+    subgraph Shared["Shared Platform Services"]
+        S1[Policy Engine]
+        S2[Observability]
+        S3[State Store]
+        S4[Vector Store]
+        S5[Tool Broker]
+    end
+
+    subgraph Models["Model Tier"]
+        ML1[SLM Pool]
+        ML2[LLM Pool]
+    end
+
+    subgraph Edge["PhoenixRooivalk Edge"]
+        E1[Detection Pipeline]
+        E2[Edge SLM Event Labeler]
+        E3[Edge SLM Summarizer]
+        E4[Edge Escalation Filter]
+    end
+
+    C1 --> G1
+    C2 --> G1
+    C3 --> CF1
+    C4 --> G1
+
+    G1 --> G2
+    G2 --> G3
+    G3 --> G4
+    G4 --> G5
+    G5 --> G6
+
+    G6 --> M1
+    G6 --> F1
+    G6 --> CF1
+    G6 --> ML2
+
+    M1 --> M2
+    M2 --> M3
+    M3 --> M4
+
+    F1 --> F2
+    F2 --> F3
+    F3 --> F4
+
+    CF1 --> CF2
+    CF2 --> CF3
+    CF3 --> CF4
+
+    G3 --> S1
+    G6 --> S2
+    M3 --> S3
+    G5 --> S3
+    G5 --> S4
+    F3 --> S5
+    CF4 --> S5
+
+    E1 --> E2
+    E2 --> E3
+    E3 --> E4
+    E4 --> G1
+```
+
+### Container Responsibilities
+
+#### AI Gateway
+
+| Container        | Responsibility                   |
+| ---------------- | -------------------------------- |
+| Ingress API      | Entry point                      |
+| SLM Classifier   | Intent/complexity classification |
+| Policy Scan      | Safety/compliance gate           |
+| Budget Router    | Tier selection                   |
+| Semantic Cache   | Avoid redundant inference        |
+| Escalation Judge | Small-vs-large decision          |
+
+#### Cognitive Mesh
+
+| Container             | Responsibility   |
+| --------------------- | ---------------- |
+| Specialist Router     | Picks agent(s)   |
+| Task Decomposer       | Splits work      |
+| State Manager         | Compressed state |
+| Synthesis Coordinator | Merge + escalate |
+
+#### AgentKit Forge
+
+| Container          | Responsibility     |
+| ------------------ | ------------------ |
+| Tool Selector      | Chooses tool       |
+| Argument Extractor | Structured inputs  |
+| Execution Loop     | Run/retry/fallback |
+| Result Compressor  | Distills output    |
+
+#### CodeFlow Engine
+
+| Container            | Responsibility      |
+| -------------------- | ------------------- |
+| PR/Diff Classifier   | File classification |
+| Risk Scorer          | Risk assessment     |
+| CI Failure Triage    | Failure bucketing   |
+| Review/Action Engine | Routing/actions     |
+
+#### PhoenixRooivalk Edge
+
+| Container              | Responsibility     |
+| ---------------------- | ------------------ |
+| Detection Pipeline     | Signal processing  |
+| Edge Event Labeler     | Labels events      |
+| Edge Summarizer        | Operator summaries |
+| Edge Escalation Filter | Cloud escalation   |
+
+---
+
+## 3. CodeFlow Sequence
+
+```mermaid
+sequenceDiagram
+    participant GH as GitHub
+    participant CF as CodeFlow
+    participant SLM as SLM Tier
+    participant TO as CI / Tool Broker
+    participant GW as AI Gateway
+    participant LLM as LLM Tier
+
+    GH->>CF: PR opened / updated
+    CF->>SLM: classify files + intent
+    SLM-->>CF: infra-change, high risk
+
+    CF->>TO: trigger CI / contract checks
+    TO-->>CF: logs, results
+
+    CF->>SLM: triage failures
+    SLM-->>CF: breaking change detected
+
+    CF->>GW: request remediation
+    GW->>LLM: analyze + explain
+    LLM-->>GW: remediation steps
+    GW-->>CF: response
+
+    CF-->>GH: PR comment with findings
+```
+
+### SLM Handles
+
+- File classification
+- Risk scoring
+- Log bucketing
+- Cause identification
+
+### LLM Handles
+
+- Remediation proposals
+- Tradeoff explanation
+- Evidence synthesis
+
+---
+
+## 4. PhoenixRooivalk Sequence
+
+```mermaid
+sequenceDiagram
+    participant Sensors
+    participant DP as Detection Pipeline
+    participant ESLM as Edge SLM
+    participant OC as Operator Console
+    participant GW as AI Gateway
+    participant CM as Cognitive Mesh
+    participant LLM as Cloud LLM
+
+    Sensors->>DP: raw detections
+    DP->>ESLM: normalized event
+    ESLM-->>DP: label + summary + confidence
+
+    DP->>OC: local alert
+
+    alt Below threshold
+        DP->>OC: local record
+    else Above threshold
+        DP->>GW: compressed bundle
+        GW->>CM: route to workflow
+        CM->>LLM: deep analysis
+        LLM-->>CM: interpretation
+        CM-->>GW: response
+        GW-->>OC: escalated advisory
+    end
+```
+
+### Design Intent
+
+- Label events
+- Summarize meaning
+- Suppress noise
+- Conserve bandwidth
+- Escalate only when justified
+
+---
+
+## 5. C4 Narrative
+
+### System Context
+
+The platform provides a unified AI control plane for developer workflows, agent orchestration, and edge intelligence.
+
+### Container View
+
+| Layer           | Description                              |
+| --------------- | ---------------------------------------- |
+| Control-plane   | Classification, policy, routing, caching |
+| Execution       | Orchestration, tools, CI, edge           |
+| Shared services | Policy, retrieval, memory, telemetry     |
+| Model           | SLM and LLM workloads                    |
+| Edge            | Local interpretation + escalation        |
+
+### Dynamic Patterns
+
+| Pattern        | System          | Description          |
+| -------------- | --------------- | -------------------- |
+| Gateway triage | AI Gateway      | Selective escalation |
+| Repo triage    | CodeFlow        | Remediation          |
+| Multi-agent    | Cognitive Mesh  | State compression    |
+| Tool loops     | AgentKit Forge  | Result distillation  |
+| Edge-first     | PhoenixRooivalk | Threshold escalation |
diff --git a/docs/architecture/reference/contracts.md b/docs/architecture/reference/contracts.md
new file mode 100644
index 0000000..7379030
--- /dev/null
+++ b/docs/architecture/reference/contracts.md
@@ -0,0 +1,117 @@
+# Shared Contracts
+
+Standardized JSON schemas used across all systems for consistent telemetry, routing, and event handling.
+
+---
+
+## RoutingDecision
+
+Emitted for every routing decision in the gateway.
+
+```json
+{
+  "intent": "string",
+  "complexity": "low|medium|high",
+  "risk_level": "low|medium|high|critical",
+  "policy_status": "allow|redact|deny|review",
+  "needs_tool": true,
+  "recommended_tier": "slm|llm",
+  "recommended_path": "direct|tool_first|mesh|escalate",
+  "confidence": 0.0
+}
+```
+
+| Field            | Type    | Description                                   |
+| ---------------- | ------- | --------------------------------------------- |
+| intent           | string  | Classified intent (e.g., "ci_failure_triage") |
+| complexity       | enum    | Estimated task complexity                     |
+| risk_level       | enum    | Risk assessment                               |
+| policy_status    | enum    | Policy engine result                          |
+| needs_tool       | boolean | Whether tool invocation is required           |
+| recommended_tier | enum    | SLM or LLM recommendation                     |
+| recommended_path | enum    | Execution path recommendation                 |
+| confidence       | float   | 0.0-1.0 confidence score                      |
+
+---
+
+## ModelUsageEvent
+
+Emitted for every model invocation for cost tracking and quality analysis.
+
+```json
+{
+  "trace_id": "uuid",
+  "system": "ai-gateway",
+  "model_tier": "slm",
+  "model_name": "phi-4-mini",
+  "token_in": 320,
+  "token_out": 64,
+  "latency_ms": 41,
+  "estimated_cost": 0.0002
+}
+```
+
+| Field          | Type   | Description                  |
+| -------------- | ------ | ---------------------------- |
+| trace_id       | uuid   | Distributed trace identifier |
+| system         | string | Originating system           |
+| model_tier     | enum   | slm or llm                   |
+| model_name     | string | Specific model used          |
+| token_in       | int    | Input tokens                 |
+| token_out      | int    | Output tokens                |
+| latency_ms     | int    | Response time                |
+| estimated_cost | float  | Estimated cost in USD        |
+
+---
+
+## ToolExecutionEvent
+
+Emitted for every tool invocation through the Tool Broker.
+
+```json
+{
+  "trace_id": "uuid",
+  "tool_name": "azure_cli",
+  "action": "monitor_query",
+  "success": true,
+  "latency_ms": 820,
+  "retry_count": 1
+}
+```
+
+| Field       | Type    | Description                  |
+| ----------- | ------- | ---------------------------- |
+| trace_id    | uuid    | Distributed trace identifier |
+| tool_name   | string  | Tool identifier              |
+| action      | string  | Action performed             |
+| success     | boolean | Execution outcome            |
+| latency_ms  | int     | Execution time               |
+| retry_count | int     | Number of retries            |
+
+---
+
+## EdgeEscalationPacket
+
+Compressed escalation from PhoenixRooivalk edge nodes.
+
+```json
+{
+  "event_id": "uuid",
+  "site_id": "string",
+  "event_label": "rf_anomaly",
+  "summary": "Consumer quadcopter signature near perimeter",
+  "confidence": 0.77,
+  "telemetry_refs": ["blob://..."],
+  "requires_cloud_analysis": true
+}
+```
+
+| Field                   | Type    | Description                       |
+| ----------------------- | ------- | --------------------------------- |
+| event_id                | uuid    | Unique event identifier           |
+| site_id                 | string  | Edge site identifier              |
+| event_label             | string  | Classified event type             |
+| summary                 | string  | Compressed human-readable summary |
+| confidence              | float   | 0.0-1.0 confidence score          |
+| telemetry_refs          | array   | Blob references for raw telemetry |
+| requires_cloud_analysis | boolean | Needs LLM-level analysis          |
diff --git a/docs/architecture/reference/cross-system.md b/docs/architecture/reference/cross-system.md
new file mode 100644
index 0000000..a02ba19
--- /dev/null
+++ b/docs/architecture/reference/cross-system.md
@@ -0,0 +1,503 @@
+# Cross-System Architecture
+
+This document describes the unified production architecture that separates:
+
+- Control plane vs execution plane
+- SLM tier vs LLM tier
+- Cloud vs edge
+- Policy, observability, cache, and cost controls
+
+## Unified Production Architecture
+
+```mermaid
+flowchart TB
+    subgraph Clients["Ingress Sources"]
+        U1[Users]
+        U2[Developers / PR Events]
+        U3[Apps / APIs]
+        U4[Operators / Mission Console]
+        U5[Sensors / Telemetry]
+    end
+
+    subgraph Cloud["Cloud Control Plane"]
+        GW[AI Gateway]
+
+        subgraph SLMCP["SLM Control Tier"]
+            S1[Intent + Complexity Classifier]
+            S2[Policy / PII / Secret / Injection Scan]
+            S3[Cost + Latency Router]
+            S4[Semantic Cache Admission / Reuse]
+            S5[Context Compressor]
+            S6[Escalation Judge]
+        end
+
+        subgraph Orchestration["Orchestration Services"]
+            CM[Cognitive Mesh]
+            AF[AgentKit Forge]
+            CF[CodeFlow Engine]
+        end
+
+        subgraph SharedServices["Shared Platform Services"]
+            POL[Policy Engine]
+            OBS[Observability / Telemetry / Audit]
+            BUD[Budget + Rate Controls]
+            MEM[State Store / Memory / Session Context]
+            VC[Vector Store / Retrieval]
+            TOOLS[Tools / APIs / CLI / GitHub / Azure / Kusto / Terraform]
+        end
+
+        subgraph LLMZone["Deep Reasoning Tier"]
+            L1[Reasoning LLM]
+            L2[Code / Analysis LLM]
+            L3[Research / Synthesis LLM]
+        end
+
+        subgraph Providers["Provider Layer"]
+            P1[OpenAI / Azure OpenAI]
+            P2[Other Model Providers]
+            P3[Local Hosted Models]
+        end
+    end
+
+    subgraph Edge["PhoenixRooivalk Edge Plane"]
+        RP[Signal / Detection Pipeline]
+        ER1[Edge SLM: Event Labeler]
+        ER2[Edge SLM: Threat Summarizer]
+        ER3[Edge SLM: Alert Composer]
+        ER4[Edge SLM: Escalation Filter]
+        OC[Operator Console]
+    end
+
+    U1 --> GW
+    U2 --> GW
+    U3 --> GW
+    U4 --> GW
+    U5 --> RP
+
+    GW --> S1
+    S1 --> S2
+    S2 --> S3
+    S3 --> S4
+    S4 --> S5
+    S5 --> S6
+
+    S2 --> POL
+    S3 --> BUD
+    S4 --> MEM
+    S5 --> VC
+    S6 --> OBS
+
+    S6 --> CM
+    S6 --> AF
+    S6 --> CF
+    S6 --> L1
+    S6 --> L2
+    S6 --> L3
+
+    CM --> MEM
+    CM --> TOOLS
+    CM --> L1
+
+    AF --> MEM
+    AF --> TOOLS
+    AF --> L2
+
+    CF --> MEM
+    CF --> TOOLS
+    CF --> L2
+
+    L1 --> P1
+    L2 --> P1
+    L3 --> P2
+    L2 --> P3
+
+    RP --> ER1
+    ER1 --> ER2
+    ER2 --> ER3
+    ER3 --> OC
+    ER2 --> ER4
+    ER4 --> GW
+```
+
+## System Responsibilities
+
+### AI Gateway
+
+The front door that owns:
+
+- Request intake
+- Classification
+- Safety checks
+- Budget-aware routing
+- Cache decisions
+- Escalation decisions
+
+### Cognitive Mesh
+
+The orchestration brain for multi-agent work:
+
+- Specialist routing
+- Decomposition
+- Shared state coordination
+
+### AgentKit Forge
+
+The tool execution runtime:
+
+- Tool selection
+- Parameter extraction
+- Execution loops
+
+### CodeFlow Engine
+
+The CI/CD intelligence plane:
+
+- PR/diff triage
+- CI failure bucketing
+- Contract breakage interpretation
+
+### PhoenixRooivalk
+
+The edge interpretation plane:
+
+- Event labeling
+- Operator alert generation
+- Low-bandwidth summaries
+
+---
+
+## Control Plane vs Execution Plane
+
+```mermaid
+flowchart LR
+    subgraph CP["Control Plane"]
+        A[AI Gateway]
+        B[SLM Routing]
+        C[Policy Engine]
+        D[Budget Controls]
+        E[Observability]
+        F[State / Memory]
+    end
+
+    subgraph EP["Execution Plane"]
+        G[Cognitive Mesh]
+        H[AgentKit Forge]
+        I[CodeFlow Engine]
+        J[LLM Providers]
+        K[Tools / APIs]
+        L[PhoenixRooivalk Edge]
+    end
+
+    A --> B
+    B --> G
+    B --> H
+    B --> I
+    B --> J
+    G --> K
+    H --> K
+    I --> K
+    L --> A
+    C --> A
+    D --> A
+    E --> A
+    F --> G
+    F --> H
+    F --> I
+```
+
+---
+
+## SLM Tier vs LLM Tier
+
+```mermaid
+flowchart TD
+    IN[Request / Event / Telemetry] --> SLM[SLM Tier]
+
+    subgraph SLMOps["SLM Responsibilities"]
+        S1[Classify]
+        S2[Screen]
+        S3[Route]
+        S4[Compress]
+        S5[Validate]
+        S6[Triage]
+    end
+
+    SLM --> S1
+    SLM --> S2
+    SLM --> S3
+    SLM --> S4
+    SLM --> S5
+    SLM --> S6
+
+    S3 --> D{Escalate?}
+    D -->|No| OUT1[Fast / Cheap Response]
+    D -->|Yes| LLM[LLM Tier]
+
+    subgraph LLMOps["LLM Responsibilities"]
+        L1[Deep reasoning]
+        L2[Complex synthesis]
+        L3[Ambiguous tradeoffs]
+        L4[Novel plan generation]
+    end
+
+    LLM --> L1
+    LLM --> L2
+    LLM --> L3
+    LLM --> L4
+    LLM --> OUT2[High-value response]
+```
+
+---
+
+## Practical Request Path (AI Gateway)
+
+```mermaid
+sequenceDiagram
+    participant C as Client
+    participant G as AI Gateway
+    participant S as SLM Layer
+    participant T as Tools
+    participant M as Mesh
+    participant L as LLM
+    participant O as Observability
+
+    C->>G: Request
+    G->>S: classify + scan + estimate complexity
+    S-->>G: route decision + confidence
+    G->>O: log request metadata
+
+    alt Simple
+        G-->>C: direct low-cost response
+    else Tool-first
+        G->>M: dispatch task
+        M->>T: execute tools
+        T-->>M: tool results
+        M->>S: compress results
+        S-->>M: compact state
+        M-->>C: response
+    else Complex
+        G->>L: escalate with compact context
+        L-->>G: deep reasoning output
+        G-->>C: final response
+    end
+```
+
+---
+
+## CodeFlow Engine CI Path
+
+```mermaid
+flowchart TD
+    PR[PR / Push / Issue Event] --> C1[SLM Diff Classifier]
+    C1 --> C2[SLM Risk Scorer]
+    C2 --> C3[SLM Test Impact Predictor]
+
+    C3 --> D{Path}
+    D -->|low risk| F1[Fast checks]
+    D -->|high risk| F2[Full CI / security / contract tests]
+    D -->|uncertain| F3[LLM or human review gate]
+
+    F1 --> L[CI Logs]
+    F2 --> L
+    F3 --> L
+
+    L --> T1[SLM Failure Triage]
+    T1 --> T2[SLM Comment Draft / Routing]
+    T2 --> T3[Action: retry / assign / block / suggest fix]
+```
+
+---
+
+## AgentKit Forge Tool Loop
+
+```mermaid
+flowchart LR
+    A[Task] --> B[SLM Tool Selector]
+    B --> C[Select Tool + Args]
+
+    C --> D1[GitHub]
+    C --> D2[Azure]
+    C --> D3[Terraform]
+    C --> D4[Kusto]
+    C --> D5[Docs / Files]
+
+    D1 --> E[SLM Result Compressor]
+    D2 --> E
+    D3 --> E
+    D4 --> E
+    D5 --> E
+
+    E --> F{Enough?}
+    F -->|yes| G[Return answer]
+    F -->|no| H[Escalate to LLM / Mesh]
+```
+
+---
+
+## PhoenixRooivalk Edge Path
+
+```mermaid
+sequenceDiagram
+    participant S as Sensors
+    participant P as Detection Pipeline
+    participant E as Edge SLM
+    participant O as Operator Console
+    participant C as Cloud Gateway
+
+    S->>P: RF / EO / radar / telemetry
+    P->>E: normalized event packet
+    E-->>P: label + summary + confidence
+    P->>O: operator alert
+
+    alt threshold exceeded
+        P->>C: send compressed evidence bundle
+    else local-only event
+        P->>O: keep local record
+    end
+```
+
+---
+
+## Layer Responsibilities
+
+| Layer         | Primary                        | SLM Role                | LLM Role             |
+| ------------- | ------------------------------ | ----------------------- | -------------------- |
+| Edge          | PhoenixRooivalk                | Reports only            | None                 |
+| Gateway       | AI Gateway                     | Routing, security, cost | Complex reasoning    |
+| Orchestration | Cognitive Mesh, AgentKit Forge | Routing, tools          | Synthesis            |
+| Intelligence  | CodeFlow Engine                | Triage                  | None                 |
+| Synthesis     | LLM Layer                      | None                    | Reasoning, synthesis |
+
+---
+
+## Ownership Boundaries
+
+### AI Gateway owns
+
+- Ingress control
+- Policy enforcement
+- Routing
+- Cost governance
+- Model/provider abstraction
+- Shared telemetry
+
+### Cognitive Mesh owns
+
+- Multi-agent coordination
+- Task decomposition
+- State fusion
+- Escalation into deep synthesis
+
+### AgentKit Forge owns
+
+- Tool loops
+- Action execution
+- Extraction
+- Retry/fallback behavior
+
+### CodeFlow Engine owns
+
+- Software delivery intelligence
+- Repo event interpretation
+- CI analysis
+- Developer feedback automation
+
+### PhoenixRooivalk owns
+
+- Edge summarization
+- Local alerting
+- Compressed event escalation
+
+---
+
+## Implementation Phases
+
+### Phase 1 — Gateway-first
+
+Build SLM control plane: intent classifier, policy scanner, budget router, cache gate, escalation judge
+
+### Phase 2 — CodeFlow Engine
+
+Add SLMs: diff classifier, PR risk scorer, CI failure bucketer
+
+### Phase 3 — AgentKit Forge
+
+Optimize tool loops: tool selector, arg extractor, result compressor
+
+### Phase 4 — Cognitive Mesh
+
+Add: specialist router, decomposer, state manager
+
+### Phase 5 — PhoenixRooivalk
+
+Deploy edge SLMs: event label, alert text, escalation filter
+
+---
+
+## Shared Telemetry Schema
+
+```json
+{
+  "trace_id": "uuid",
+  "system": "ai-gateway|cognitive-mesh|codeflow-engine|agentkit-forge|phoenixrooivalk",
+  "stage": "classify|route|tool_call|llm_escalation|edge_alert",
+  "model_tier": "slm|llm",
+  "model_name": "example-model",
+  "decision": "allow|block|tool_first|escalate|local_only",
+  "confidence": 0.92,
+  "latency_ms": 83,
+  "token_in": 540,
+  "token_out": 96,
+  "estimated_cost": 0.0014,
+  "policy_flags": ["pii:none", "secret:none"],
+  "outcome": "success"
+}
+```
+
+---
+
+## Production Rules
+
+### Escalate to LLM when:
+
+- Confidence below threshold
+- Ambiguity above threshold
+- Multiple specialists disagree
+- Tool results conflict
+- Output is user-facing and high-stakes
+- Architecture/tradeoff reasoning required
+
+### Stay in SLM path when:
+
+- Task is classification
+- Task is screening
+- Task is extraction
+- Task is summarization
+- Task is repetitive CI triage
+- Task is edge-local operator support
+
+---
+
+## C4-Style Architecture
+
+For detailed C4-style diagrams including:
+
+- System Context diagram
+- Container diagram
+- CodeFlow sequence
+- PhoenixRooivalk edge-to-cloud sequence
+
+See [c4-architecture.md](c4-architecture.md)
+
+---
+
+## Bottom Line
+
+The most practical target architecture:
+
+- **AI Gateway** as the centralized SLM control plane
+- **Cognitive Mesh / AgentKit Forge / CodeFlow Engine** as execution systems
+- **PhoenixRooivalk** as edge plane with local SLM autonomy
+- **LLMs** reserved for synthesis, ambiguity, and hard reasoning
+
+> Gateway governs. SLMs triage and steer. Specialist systems execute. LLMs arbitrate the hard cases. Edge stays local unless escalation is justified.
diff --git a/docs/architecture/reference/dashboards.md b/docs/architecture/reference/dashboards.md
new file mode 100644
index 0000000..9e70f36
--- /dev/null
+++ b/docs/architecture/reference/dashboards.md
@@ -0,0 +1,17 @@
+# Recommended Dashboards
+
+Grafana/ADX dashboard recommendations for operational visibility.
+
+---
+
+## Dashboard Pack
+
+Split Grafana/ADX dashboards into these boards:
+
+| Dashboard            | Metrics                                                                               |
+| -------------------- | ------------------------------------------------------------------------------------- |
+| **Executive / Cost** | Total requests, SLM vs LLM ratio, cost by route, cost per outcome, escalation rate    |
+| **Reliability**      | Error rate, tool failure rate, retry hotspots, provider latency, queue backlog        |
+| **Governance**       | Policy blocks, redaction counts, provider data-boundary usage, audit completeness     |
+| **CodeFlow**         | PR risk distribution, CI triage buckets, contract-break suspects, feedback usefulness |
+| **Rooivalk**         | Detections vs alerts, local vs escalated, site alert volume, edge latency             |
diff --git a/docs/architecture/reference/deployment-observability.md b/docs/architecture/reference/deployment-observability.md
new file mode 100644
index 0000000..f237e21
--- /dev/null
+++ b/docs/architecture/reference/deployment-observability.md
@@ -0,0 +1,383 @@
+# Deployment, Trust Boundaries & Observability
+
+This set extends the C4 view into operational architecture including deployment, security boundaries, and telemetry.
+
+---
+
+## 1. Deployment Diagram
+
+This is the practical cloud/edge deployment shape for your stack.
+
+```mermaid
+flowchart TB
+    subgraph Internet["Public / External"]
+        U1[Users / Browsers / Chat Clients]
+        U2[GitHub Webhooks]
+        U3[External APIs / Apps]
+        MP[Model Providers]
+    end
+
+    subgraph Azure["Azure Subscription"]
+        DNS[Azure DNS / Front Door / App Gateway]
+        KV[Key Vault]
+        LAW[Log Analytics]
+        ADX[Azure Data Explorer / Kusto]
+        BLOB[Blob Storage]
+        REDIS[Redis / Cache]
+        DB[Postgres / Cosmos / State DB]
+        AISEARCH[Vector Store / AI Search]
+        GRAF[Grafana]
+        BUS[Service Bus / Queue]
+        MON[Azure Monitor / App Insights]
+
+        subgraph Runtime["Runtime Plane"]
+            GW[AI Gateway]
+            CM[Cognitive Mesh]
+            AKF[AgentKit Forge]
+            CFE[CodeFlow Engine]
+            TB[Tool Broker]
+            OPA[Policy Engine]
+        end
+
+        subgraph Workers["Background / Event Workers"]
+            W1[PR / CI Worker]
+            W2[Agent Task Worker]
+            W3[Telemetry Ingest Worker]
+            W4[Cost / Audit Aggregator]
+        end
+
+        subgraph Models["Hosted Model Zone"]
+            SLM[SLM Serving Pool]
+            LLM[LLM Adapter / Provider Proxy]
+        end
+    end
+
+    subgraph Edge["PhoenixRooivalk Edge Sites"]
+        SENS[RF / EO / Radar / Telemetry Sensors]
+        EDGEPIPE[Detection Pipeline]
+        E1[Edge SLM Event Labeler]
+        E2[Edge SLM Summarizer]
+        E3[Edge Escalation Filter]
+        OPC[Operator Console]
+        SYNC[Secure Sync Agent]
+    end
+
+    U1 --> DNS
+    U2 --> DNS
+    U3 --> DNS
+    DNS --> GW
+
+    GW --> REDIS
+    GW --> DB
+    GW --> AISEARCH
+    GW --> KV
+    GW --> OPA
+    GW --> TB
+    GW --> SLM
+    GW --> LLM
+    GW --> MON
+
+    CM --> DB
+    CM --> BUS
+    CM --> AISEARCH
+    CM --> TB
+    CM --> SLM
+    CM --> LLM
+    CM --> MON
+
+    AKF --> DB
+    AKF --> BUS
+    AKF --> TB
+    AKF --> SLM
+    AKF --> LLM
+    AKF --> MON
+
+    CFE --> DB
+    CFE --> BUS
+    CFE --> TB
+    CFE --> SLM
+    CFE --> LLM
+    CFE --> MON
+
+    W1 --> CFE
+    W2 --> AKF
+    W3 --> GW
+    W4 --> ADX
+
+    MON --> LAW
+    LAW --> ADX
+    BLOB --> ADX
+    ADX --> GRAF
+
+    MP --> LLM
+
+    SENS --> EDGEPIPE
+    EDGEPIPE --> E1
+    E1 --> E2
+    E2 --> E3
+    E2 --> OPC
+    E3 --> SYNC
+    SYNC --> GW
+```
+
+### Practical Reading of Deployment
+
+| Zone                 | Components                                                | Purpose                 |
+| -------------------- | --------------------------------------------------------- | ----------------------- |
+| **Front door**       | Azure DNS / Front Door / App Gateway                      | Ingress and routing     |
+| **Shared backing**   | Key Vault, Redis, Postgres/Cosmos, AI Search, Service Bus | State, caching, secrets |
+| **Runtime services** | AI Gateway, Cognitive Mesh, AgentKit Forge, CodeFlow      | Core execution          |
+| **Workers**          | PR/CI, Agent Task, Telemetry, Cost Aggregators            | Background processing   |
+| **Model zone**       | SLM Pool, LLM Adapter                                     | AI inference            |
+| **Edge**             | Detection Pipeline, Edge SLMs, Operator Console           | Local operation         |
+
+---
+
+## 2. Trust Boundary Diagram
+
+This is the security-relevant segmentation.
+
+```mermaid
+flowchart LR
+    subgraph TB1["Boundary 1: Public / Untrusted"]
+        A[Users / Browsers]
+        B[GitHub Webhooks]
+        C[External Apps]
+        D[Internet Traffic]
+    end
+
+    subgraph TB2["Boundary 2: Controlled Ingress"]
+        E[Front Door / API Gateway / WAF]
+        F[AI Gateway]
+    end
+
+    subgraph TB3["Boundary 3: Internal Control Plane"]
+        G[Policy Engine]
+        H[Budget / Rate Controls]
+        I[Session / State Store]
+        J[Semantic Cache]
+        K[Observability / Audit]
+    end
+
+    subgraph TB4["Boundary 4: Internal Execution Plane"]
+        L[Cognitive Mesh]
+        M[AgentKit Forge]
+        N[CodeFlow Engine]
+        O[Tool Broker]
+    end
+
+    subgraph TB5["Boundary 5: Sensitive Integration Zone"]
+        P[Key Vault]
+        Q[Azure APIs]
+        R[GitHub APIs]
+        S[Kusto / Terraform / Internal Tools]
+    end
+
+    subgraph TB6["Boundary 6: External Model Providers"]
+        T[LLM Providers]
+        U[Hosted / External SLM Providers]
+    end
+
+    subgraph TB7["Boundary 7: Edge / Field Environment"]
+        V[PhoenixRooivalk Edge Node]
+        W[Sensors]
+        X[Operator Console]
+    end
+
+    A --> E
+    B --> E
+    C --> E
+    D --> E
+    E --> F
+
+    F --> G
+    F --> H
+    F --> I
+    F --> J
+    F --> K
+
+    F --> L
+    F --> M
+    F --> N
+    L --> O
+    M --> O
+    N --> O
+
+    O --> Q
+    O --> R
+    O --> S
+    F --> P
+    L --> P
+    M --> P
+    N --> P
+
+    F --> T
+    F --> U
+    L --> T
+    M --> T
+    N --> T
+
+    W --> V
+    V --> X
+    V --> F
+```
+
+### Security Interpretation
+
+| Boundary  | Description                                                                               |
+| --------- | ----------------------------------------------------------------------------------------- |
+| **1 → 2** | Treat all inbound as hostile until authenticated, rate-limited, schema-validated, logged  |
+| **2 → 3** | AI Gateway is the only entry into internal AI control plane                               |
+| **3 → 4** | Control-plane services decide policy, routing, cost, escalation                           |
+| **4 → 5** | Sensitive zone: credentials, infra mutation, production APIs, write actions               |
+| **6**     | External providers are semi-trusted - apply output scanning and redaction                 |
+| **7**     | Edge nodes are partially disconnected - need signed software, local audit, encrypted sync |
+
+---
+
+## 3. Observability Architecture
+
+This is the unified telemetry design across all systems.
+
+```mermaid
+flowchart TB
+    subgraph Producers["Telemetry Producers"]
+        P1[AI Gateway]
+        P2[Cognitive Mesh]
+        P3[AgentKit Forge]
+        P4[CodeFlow Engine]
+        P5[PhoenixRooivalk Edge]
+        P6[Tool Broker]
+        P7[Policy Engine]
+    end
+
+    subgraph Signals["Signal Types"]
+        S1[Request / Response Logs]
+        S2[Routing Decisions]
+        S3[Policy Events]
+        S4[Tool Calls]
+        S5[Model Usage]
+        S6[CI / PR Events]
+        S7[Edge Detection Events]
+        S8[Cost / Token Metrics]
+        S9[Audit Trail]
+    end
+
+    subgraph Ingest["Ingestion"]
+        I1[OpenTelemetry Collectors]
+        I2[Azure Monitor / App Insights]
+        I3[Blob Export]
+        I4[Log Analytics]
+    end
+
+    subgraph Analytics["Analytics / Query"]
+        A1[Azure Data Explorer / Kusto]
+        A2[Cost Aggregates]
+        A3[Decision Quality Metrics]
+        A4[Security / Audit Views]
+    end
+
+    subgraph Viz["Visualization / Alerting"]
+        V1[Grafana Dashboards]
+        V2[Alerts / On-call]
+        V3[Ops Runbooks]
+        V4[Executive Cost Views]
+    end
+
+    P1 --> S1
+    P1 --> S2
+    P1 --> S5
+    P1 --> S8
+    P1 --> S9
+
+    P2 --> S2
+    P2 --> S4
+    P2 --> S5
+    P2 --> S9
+
+    P3 --> S4
+    P3 --> S5
+    P3 --> S9
+
+    P4 --> S6
+    P4 --> S2
+    P4 --> S5
+    P4 --> S9
+
+    P5 --> S7
+    P5 --> S2
+    P5 --> S9
+
+    P6 --> S4
+    P7 --> S3
+
+    S1 --> I1
+    S2 --> I1
+    S3 --> I1
+    S4 --> I1
+    S5 --> I2
+    S6 --> I2
+    S7 --> I3
+    S8 --> I2
+    S9 --> I4
+
+    I1 --> A1
+    I2 --> A1
+    I3 --> A1
+    I4 --> A1
+
+    A1 --> A2
+    A1 --> A3
+    A1 --> A4
+
+    A2 --> V1
+    A3 --> V1
+    A4 --> V1
+    V1 --> V2
+    V1 --> V3
+    V1 --> V4
+```
+
+### What to Measure
+
+#### Gateway metrics
+
+- Requests by route
+- SLM vs LLM escalation rate
+- Confidence distribution
+- Token in/out averages
+- Semantic cache hit rate
+- Refusal/block counts
+- Provider latency/error rate
+
+#### Cognitive Mesh metrics
+
+- Route-to-specialist distribution
+- Decomposition count per task
+- Summary compression ratio
+- Multi-agent disagreement rate
+- Escalation rate to LLM synthesis
+
+#### AgentKit Forge metrics
+
+- Tool selection accuracy
+- Retry counts
+- Fallback frequency
+- Avg tool-loop depth
+- Tool output compression ratio
+
+#### CodeFlow Engine metrics
+
+- PR classification distribution
+- False positive/negative on risk tier
+- CI failure bucket frequency
+- Contract-break detection precision
+- Comment usefulness feedback
+
+#### PhoenixRooivalk metrics
+
+- Local-only vs escalated events
+- Edge summary latency
+- Alert volume per session
+- Signal-to-alert compression ratio
+- Dropped/deferred syncs
diff --git a/docs/architecture/reference/matrix-agentkit.md b/docs/architecture/reference/matrix-agentkit.md
new file mode 100644
index 0000000..ef3cd79
--- /dev/null
+++ b/docs/architecture/reference/matrix-agentkit.md
@@ -0,0 +1,110 @@
+# AgentKit Forge SLM Implementation
+
+## SLM Endpoints
+
+| Endpoint                | Method | Purpose                                           |
+| ----------------------- | ------ | ------------------------------------------------- |
+| `/slm/select-tool`      | POST   | Maps request to GitHub/Azure/Terraform/Kusto/docs |
+| `/slm/filter-context`   | POST   | Selects only relevant memory/state                |
+| `/slm/estimate-budget`  | POST   | Predicts steps, token tier, tool-first viability  |
+| `/slm/check-escalation` | POST   | Decides whether LLM planning is needed            |
+
+## Service Boundaries
+
+```mermaid
+flowchart TD
+    A[Agent Runtime] --> B[Task Intake]
+    B --> C[SLM Tool Selector]
+    C --> D{Tool / Reason / Direct}
+    D --> E[Tool Adapter Layer]
+    D --> F[Direct Response]
+    D --> G[LLM Planner]
+    E --> H[State Store]
+    G --> H
+    H --> I[SLM Context Filter]
+    I --> J[Next Action]
+```
+
+## Example Responses
+
+**select-tool:**
+
+```json
+{
+  "action_mode": "tool",
+  "tool": "azure_cli",
+  "operation_family": "cost_management",
+  "arguments_hint": { "service": "foundry", "time_window": "last_30_days" },
+  "confidence": 0.89
+}
+```
+
+**estimate-budget:**
+
+```json
+{
+  "predicted_steps": 4,
+  "token_cost_tier": "medium",
+  "tool_first_recommended": true,
+  "llm_needed": false,
+  "confidence": 0.81
+}
+```
+
+## Contract Shapes
+
+```typescript
+interface SelectToolOutput {
+  action_mode: "tool" | "reason" | "direct";
+  tool: "github" | "azure_cli" | "terraform" | "kusto" | "docs_search";
+  operation_family: string;
+  arguments_hint: Record<string, unknown>;
+  confidence: number;
+}
+
+interface EstimateBudgetOutput {
+  predicted_steps: number;
+  token_cost_tier: "low" | "medium" | "high";
+  tool_first_recommended: boolean;
+  llm_needed: boolean;
+  confidence: number;
+}
+```
+
+## Telemetry Fields
+
+| Field               | Type    | Description        |
+| ------------------- | ------- | ------------------ |
+| `agent_run_id`      | uuid    | Unique run ID      |
+| `selected_tool`     | string  | Tool selected      |
+| `action_mode`       | string  | tool/reason/direct |
+| `budget_tier`       | string  | Cost tier          |
+| `predicted_steps`   | number  | Steps predicted    |
+| `escalated_to_llm`  | boolean | LLM invoked        |
+| `compression_ratio` | number  | Context reduced    |
+
+## Fallback Rules
+
+| Condition                     | Action                      |
+| ----------------------------- | --------------------------- |
+| No tool confidence >= 0.80    | Don't execute automatically |
+| Context filter low            | Preserve more context       |
+| Budget low but ambiguity high | Escalate to planner         |
+| Tool failure                  | Classify before retry       |
+
+## Configurable Thresholds
+
+```typescript
+const DEFAULT_THRESHOLDS = {
+  tool_selection: { direct_execute: 0.85, require_confirm: 0.7 },
+  context_filtering: { aggressive: 0.85, conservative: 0.78 },
+  escalation_check: { continue_tools: 0.8, llm_planning: 0.65 },
+  budget_estimate: { reliable: 0.75, uncertain: 0.6 },
+};
+```
+
+| Threshold | Action               |
+| --------- | -------------------- |
+| >= 0.85   | Direct execution     |
+| 0.70-0.84 | Require confirmation |
+| < 0.70    | Decline / clarify    |
diff --git a/docs/architecture/reference/matrix-codeflow.md b/docs/architecture/reference/matrix-codeflow.md
new file mode 100644
index 0000000..90d2fe7
--- /dev/null
+++ b/docs/architecture/reference/matrix-codeflow.md
@@ -0,0 +1,110 @@
+# CodeFlow Engine SLM Implementation
+
+## SLM Endpoints
+
+| Endpoint                     | Method | Purpose                                                         |
+| ---------------------------- | ------ | --------------------------------------------------------------- |
+| `/slm/classify-change`       | POST   | Determines: docs/code/config/infra/security, risk, blast radius |
+| `/slm/suggest-pipeline`      | POST   | Fast path vs full path                                          |
+| `/slm/summarize-failure`     | POST   | Turns CI output into actionable summary                         |
+| `/slm/release-note-fragment` | POST   | Generates structured change summary                             |
+
+## Service Boundaries
+
+```mermaid
+flowchart TD
+    A[GitHub Event] --> B[Diff / Metadata Collector]
+    B --> C[SLM Change Classifier]
+    C --> D[Pipeline Policy Engine]
+    D --> E[CI Path Selection]
+    E --> F[Workflow Execution]
+    F --> G[SLM Failure Summarizer]
+    G --> H[PR Comment / Status]
+```
+
+## Example Responses
+
+**classify-change:**
+
+```json
+{
+  "change_type": "infra",
+  "risk": "high",
+  "blast_radius": "shared_environment",
+  "requires_contract_validation": false,
+  "requires_security_scan": true,
+  "recommended_pipeline": "full",
+  "confidence": 0.91
+}
+```
+
+**summarize-failure:**
+
+```json
+{
+  "failure_type": "test_failure",
+  "subtype": "integration_environment",
+  "retryable": true,
+  "summary": "Integration tests failed due to unreachable dependent service.",
+  "recommended_next_action": "retry once and verify service container health",
+  "confidence": 0.83
+}
+```
+
+## Contract Shapes
+
+```typescript
+interface ClassifyChangeOutput {
+  change_type: "docs" | "code" | "config" | "schema" | "infra" | "security";
+  risk: "low" | "medium" | "high" | "critical";
+  blast_radius: "local_only" | "shared_environment" | "production";
+  requires_security_scan: boolean;
+  recommended_pipeline: "fast" | "full";
+  confidence: number;
+}
+
+interface SummarizeFailureOutput {
+  failure_type: string;
+  retryable: boolean;
+  summary: string;
+  recommended_next_action: string;
+  confidence: number;
+}
+```
+
+## Telemetry Fields
+
+| Field                           | Type   | Description         |
+| ------------------------------- | ------ | ------------------- |
+| `repo`                          | string | Repository          |
+| `pr_number`                     | number | PR number           |
+| `change_type`                   | string | Classified type     |
+| `risk`                          | string | Risk level          |
+| `pipeline_selected`             | string | Path chosen         |
+| `slm_classification_latency_ms` | number | Classification time |
+| `workflow_duration_ms`          | number | Total duration      |
+
+## Fallback Rules
+
+| Condition                           | Action                               |
+| ----------------------------------- | ------------------------------------ |
+| Never skip mandatory tests from SLM | Hard policy enforcement              |
+| High-risk + low confidence          | Choose stricter pipeline             |
+| Classifier unavailable              | Default conservative path            |
+| Failure uncertain                   | No destructive reruns without policy |
+
+## Configurable Thresholds
+
+```typescript
+const DEFAULT_THRESHOLDS = {
+  change_classification: { direct_use: 0.88, manual_review: 0.75 },
+  pipeline_suggestion: { direct_path: 0.85, force_full_path: 0.7 },
+  failure_summary: { direct_use: 0.8, require_human: 0.65 },
+};
+```
+
+| Threshold | Action            |
+| --------- | ----------------- |
+| >= 0.88   | Direct use        |
+| 0.75-0.87 | Verify with rules |
+| < 0.75    | Manual review     |
diff --git a/docs/architecture/reference/matrix-cognitive-mesh.md b/docs/architecture/reference/matrix-cognitive-mesh.md
new file mode 100644
index 0000000..5d11f33
--- /dev/null
+++ b/docs/architecture/reference/matrix-cognitive-mesh.md
@@ -0,0 +1,112 @@
+# Cognitive Mesh SLM Implementation
+
+## SLM Endpoints
+
+| Endpoint                 | Method | Purpose                                     |
+| ------------------------ | ------ | ------------------------------------------- |
+| `/slm/decompose-task`    | POST   | Break complex request into agent tasks      |
+| `/slm/route-agent`       | POST   | Route task to appropriate specialist agent  |
+| `/slm/compress-context`  | POST   | Compress long context for agent consumption |
+| `/slm/validate-response` | POST   | Validate agent response coherence           |
+
+## Service Boundaries
+
+```mermaid
+flowchart TD
+    A[Mesh Entry] --> B[SLM Router]
+    B --> C{Single or Multi-Agent?}
+    C -->|Single| D[Specialist Agent]
+    C -->|Multi| E[SLM Decomposer]
+    E --> F[Task Graph]
+    F --> G[Specialist Agents]
+    D --> H[Evidence Store]
+    G --> H
+    H --> I[SLM Compressor]
+    I --> J[LLM Synthesizer]
+```
+
+## Example Responses
+
+**route-agent:**
+
+```json
+{
+  "mode": "multi_agent",
+  "agents": ["infra_agent", "cost_agent", "security_agent"],
+  "priority": "normal",
+  "reason_codes": ["azure", "cost", "security_terms"],
+  "confidence": 0.87
+}
+```
+
+**decompose-task:**
+
+```json
+{
+  "subtasks": [
+    { "id": "t1", "agent": "infra_agent", "goal": "inventory deployed Azure resources" },
+    { "id": "t2", "agent": "cost_agent", "goal": "identify cost spikes" },
+    { "id": "t3", "agent": "security_agent", "goal": "check for unauthorized usage" }
+  ],
+  "confidence": 0.82
+}
+```
+
+## Contract Shapes
+
+```typescript
+interface RouteAgentOutput {
+  target_agent: string;
+  mode: "single_agent" | "parallel_agents" | "sequential";
+  escalation_required: boolean;
+  fallback_agent?: string;
+  confidence: number;
+}
+
+interface DecomposeTaskOutput {
+  tasks: {
+    id: string;
+    description: string;
+    agent_type: string;
+    dependencies: string[];
+  }[];
+  estimated_complexity: "low" | "medium" | "high";
+  confidence: number;
+}
+```
+
+## Telemetry Fields
+
+| Field                 | Type     | Description         |
+| --------------------- | -------- | ------------------- |
+| `mesh_run_id`         | uuid     | Unique execution ID |
+| `route_mode`          | string   | single/multi agent  |
+| `selected_agents`     | string[] | Agents selected     |
+| `decomposition_count` | number   | Subtasks created    |
+| `compression_ratio`   | number   | Tokens reduced      |
+| `escalated_to_llm`    | boolean  | LLM used            |
+
+## Fallback Rules
+
+| Condition            | Action                    |
+| -------------------- | ------------------------- |
+| Route confidence low | Send to orchestration LLM |
+| Decomposition low    | Single-agent fallback     |
+| Compression low      | Pass fuller context       |
+| No agent matches     | Default to "research"     |
+
+## Configurable Thresholds
+
+```typescript
+const DEFAULT_THRESHOLDS = {
+  agent_routing: { direct_route: 0.85, verify_with_rules: 0.7 },
+  task_decomposition: { direct_decompose: 0.8, single_agent_fallback: 0.65 },
+  context_compression: { aggressive: 0.85, conservative: 0.78 },
+};
+```
+
+| Threshold | Action            |
+| --------- | ----------------- |
+| >= 0.85   | Direct routing    |
+| 0.70-0.84 | Verify with rules |
+| < 0.70    | Escalate to LLM   |
diff --git a/docs/architecture/reference/matrix-gateway.md b/docs/architecture/reference/matrix-gateway.md
new file mode 100644
index 0000000..4551887
--- /dev/null
+++ b/docs/architecture/reference/matrix-gateway.md
@@ -0,0 +1,111 @@
+# AI Gateway SLM Implementation
+
+## SLM Endpoints
+
+| Endpoint                 | Method | Purpose                                               |
+| ------------------------ | ------ | ----------------------------------------------------- |
+| `/slm/classify-request`  | POST   | Infer intent, estimate complexity, detect toolability |
+| `/slm/policy-screen`     | POST   | PII/secrets/prompt injection scan, tenant policy fit  |
+| `/slm/post-tag-response` | POST   | Normalize telemetry tags, classify business category  |
+
+## Service Boundaries
+
+```mermaid
+flowchart TD
+    A[Gateway API] --> B[Policy Engine]
+    B --> C[SLM Router Service]
+    C --> D[Model Selection Engine]
+    D --> E[Provider Adapter]
+    E --> F[LLM / SLM / Tool]
+    F --> G[Response Validator]
+    G --> H[Telemetry + Billing]
+```
+
+## Example Request/Response
+
+**Request:**
+
+```json
+{
+  "tenant_id": "phoenixvc-prod",
+  "user_input": "Review this PR and tell me if the API contract changed.",
+  "context": {
+    "channel": "web",
+    "has_files": true,
+    "history_len": 7
+  }
+}
+```
+
+**Response:**
+
+```json
+{
+  "intent": "code_review",
+  "complexity": "medium",
+  "tool_candidate": true,
+  "recommended_target": "codeflow-engine",
+  "recommended_model_tier": "small",
+  "escalation_required": false,
+  "confidence": 0.93
+}
+```
+
+## Contract Shapes
+
+```typescript
+interface ClassifyRequestOutput {
+  request_id: string;
+  label: "code_review" | "chat" | "analysis" | "tool_invocation" | "embedding";
+  confidence: number;
+  complexity: "low" | "medium" | "high";
+  tool_candidate: boolean;
+  recommended_tier: "slm" | "small" | "large";
+  cacheable: boolean;
+}
+
+interface PolicyScreenOutput {
+  allowed: boolean;
+  risk_level: "low" | "medium" | "high" | "critical";
+  risk_categories: string[];
+  action: "allow" | "rewrite" | "block" | "escalate";
+  confidence: number;
+}
+```
+
+## Telemetry Fields
+
+| Field               | Type    | Description         |
+| ------------------- | ------- | ------------------- |
+| `tenant_id`         | string  | Tenant identifier   |
+| `slm_latency_ms`    | number  | SLM processing time |
+| `intent`            | string  | Classified intent   |
+| `complexity`        | string  | Complexity level    |
+| `risk_level`        | string  | Risk assessment     |
+| `tool_candidate`    | boolean | Tool recommendation |
+| `escalated_to_llm`  | boolean | Whether escalated   |
+| `cost_estimate_usd` | number  | Estimated cost      |
+
+## Fallback Rules
+
+| Condition                        | Action                 |
+| -------------------------------- | ---------------------- |
+| `policy-screen.allowed == false` | Block or redact        |
+| `confidence < 0.70`              | Escalate to LLM        |
+| Tool suggested but no mapping    | Send to general LLM    |
+| Tagging fails                    | Mark telemetry partial |
+
+## Configurable Thresholds
+
+```typescript
+const DEFAULT_THRESHOLDS = {
+  intent_classification: { direct_route: 0.9, verify_with_rules: 0.75 },
+  policy: { block_immediately: ["critical_secrets"], escalate_to_review: 0.6 },
+};
+```
+
+| Threshold | Action            |
+| --------- | ----------------- |
+| >= 0.90   | Direct routing    |
+| 0.75-0.89 | Verify with rules |
+| < 0.75    | Escalate to LLM   |
diff --git a/docs/architecture/reference/matrix-mystira.md b/docs/architecture/reference/matrix-mystira.md
new file mode 100644
index 0000000..ae81461
--- /dev/null
+++ b/docs/architecture/reference/matrix-mystira.md
@@ -0,0 +1,137 @@
+# Mystira SLM Implementation
+
+## SLM Endpoints
+
+| Endpoint                     | Method | Purpose                                                          |
+| ---------------------------- | ------ | ---------------------------------------------------------------- |
+| `/slm/classify-session`      | POST   | Determines: bedtime/educational/adventure/branching/continuation |
+| `/slm/check-safety-agefit`   | POST   | Ensures age appropriateness, tone, blocked content               |
+| `/slm/check-continuity`      | POST   | Maintains character consistency, world rules                     |
+| `/slm/shape-image-prompt`    | POST   | Converts story scene to safe, style-consistent prompt            |
+| `/slm/compress-story-memory` | POST   | Keeps only relevant story state                                  |
+
+## Service Boundaries
+
+```mermaid
+flowchart TD
+    A[User / Parent / Educator Input] --> B[Story Session Manager]
+    B --> C[SLM Session Classifier]
+    C --> D[Safety + Age Fit]
+    D --> E{Simple or Creative}
+    E -->|Simple| F[Template / Guided Story Engine]
+    E -->|Creative| G[LLM Narrative Engine]
+    G --> H[SLM Continuity + Reading Level Pass]
+    F --> H
+    H --> I[Story Output]
+    H --> J[Image Prompt Shaper]
+```
+
+## Example Responses
+
+**check-safety-agefit:**
+
+```json
+{
+  "allowed": true,
+  "age_band": "8-10",
+  "tone": "gentle_adventure",
+  "rewrite_needed": false,
+  "blocked_categories": [],
+  "confidence": 0.94
+}
+```
+
+**check-continuity:**
+
+```json
+{
+  "consistent": true,
+  "issues": [],
+  "retained_story_facts": [
+    "main character is Luma",
+    "forest companion is a silver fox",
+    "setting is moonlit valley"
+  ],
+  "confidence": 0.86
+}
+```
+
+**shape-image-prompt:**
+
+```json
+{
+  "prompt": "A child-safe illustrated moonlit valley scene with Luma and a silver fox, soft wonder, readable composition, no frightening imagery.",
+  "safety_checked": true,
+  "style_profile": "mystira_storybook_v1",
+  "confidence": 0.9
+}
+```
+
+## Contract Shapes
+
+```typescript
+interface ClassifySessionOutput {
+  story_type: "bedtime" | "educational" | "adventure" | "branching" | "continuation";
+  age_band: string;
+  is_interactive: boolean;
+  needs_images: boolean;
+  curriculum_tags: string[];
+  confidence: number;
+}
+
+interface CheckSafetyAgefitOutput {
+  allowed: boolean;
+  age_band: string;
+  tone: string;
+  rewrite_needed: boolean;
+  blocked_categories: string[];
+  confidence: number;
+}
+
+interface ShapeImagePromptOutput {
+  prompt: string;
+  safety_checked: boolean;
+  style_profile: string;
+  confidence: number;
+}
+```
+
+## Telemetry Fields
+
+| Field                   | Type    | Description      |
+| ----------------------- | ------- | ---------------- |
+| `session_id`            | uuid    | Session ID       |
+| `story_mode`            | string  | Classification   |
+| `age_band`              | string  | Target age       |
+| `safety_action`         | string  | Action taken     |
+| `rewrite_applied`       | boolean | Rewritten        |
+| `continuity_check_used` | boolean | Validated        |
+| `image_prompt_shaped`   | boolean | Prompt generated |
+| `slm_cost`              | number  | SLM cost         |
+| `llm_cost`              | number  | LLM cost         |
+
+## Fallback Rules
+
+| Condition          | Action                   |
+| ------------------ | ------------------------ |
+| Safety uncertainty | Safe rewrite or refuse   |
+| Continuity low     | Pass more history to LLM |
+| Image shaping low  | Conservative template    |
+| Age-fit uncertain  | Default younger-safe     |
+
+## Configurable Thresholds
+
+```typescript
+const DEFAULT_THRESHOLDS = {
+  session_classification: { direct_use: 0.88, require_review: 0.75 },
+  safety_agefit: { direct_allow: 0.92, require_rewrite: 0.8, block: 0.8 },
+  continuity: { direct_use: 0.82, pass_to_llm: 0.7 },
+  image_prompt: { direct_use: 0.88, conservative: 0.75 },
+};
+```
+
+| Threshold | Action        |
+| --------- | ------------- |
+| >= 0.92   | Direct allow  |
+| 0.80-0.91 | Rewrite/adapt |
+| < 0.80    | Block content |
diff --git a/docs/architecture/reference/matrix-rooivalk.md b/docs/architecture/reference/matrix-rooivalk.md
new file mode 100644
index 0000000..29f20da
--- /dev/null
+++ b/docs/architecture/reference/matrix-rooivalk.md
@@ -0,0 +1,120 @@
+# PhoenixRooivalk SLM Implementation
+
+## SLM Endpoints
+
+| Endpoint                        | Method | Purpose                                              |
+| ------------------------------- | ------ | ---------------------------------------------------- |
+| `/slm/interpret-event`          | POST   | Turns fused detection into operator-readable summary |
+| `/slm/suggest-sop`              | POST   | Maps event type to likely SOP references             |
+| `/slm/condense-mission-log`     | POST   | Produces incident record                             |
+| `/slm/classify-incident-report` | POST   | Creates structured post-event label set              |
+
+## Service Boundaries
+
+```mermaid
+flowchart TD
+    A[RF / Radar / EO / IR / Rules] --> B[Fusion + Threat Scoring]
+    B --> C[Hard Decision Layer]
+    B --> D[SLM Interpretation Layer]
+    D --> E[Operator Console Summary]
+    D --> F[SOP Suggestions]
+    D --> G[Mission Narrative]
+    C --> H[Manual Review / Control Path]
+```
+
+## CRITICAL: SLM is for Reporting Only
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                   IMPORTANT - SAFETY BOUNDARY            │
+├─────────────────────────────────────────────────────────┤
+│  Hard Decision Layer must NOT depend on free-form SLM   │
+│                                                         │
+│  SLM output is for OBSERVATION and REPORTING only:     │
+│  • Operator summaries                                   │
+│  • SOP suggestions (non-binding)                       │
+│  • Mission log condensation                             │
+│                                                         │
+│  SLM must NEVER be used for:                           │
+│  • Autonomous threat response                          │
+│  • Access control decisions                            │
+│  • Resource isolation actions                          │
+│  • Any kinetic or hard control actions                 │
+└─────────────────────────────────────────────────────────┘
+```
+
+## Example Responses
+
+**interpret-event:**
+
+```json
+{
+  "title": "Low-altitude inbound contact",
+  "facts": ["sector north-east", "altitude 35m", "consumer quadcopter RF profile"],
+  "inferences": ["possible perimeter reconnaissance"],
+  "operator_summary": "Inbound low-altitude contact detected from north-east sector.",
+  "confidence": 0.77
+}
+```
+
+**suggest-sop:**
+
+```json
+{
+  "recommended_sops": ["SOP-12 Verify EO feed", "SOP-21 Raise perimeter alert state"],
+  "confidence": 0.74
+}
+```
+
+## Contract Shapes
+
+```typescript
+interface InterpretEventOutput {
+  title: string;
+  facts: string[];
+  inferences: string[];
+  operator_summary: string;
+  confidence: number;
+}
+
+interface SuggestSopOutput {
+  recommended_sops: string[];
+  confidence: number;
+}
+```
+
+## Telemetry Fields
+
+| Field                     | Type     | Description        |
+| ------------------------- | -------- | ------------------ |
+| `incident_id`             | uuid     | Incident ID        |
+| `sensor_fusion_version`   | string   | Fusion version     |
+| `threat_score`            | number   | Calculated score   |
+| `slm_interpretation_used` | boolean  | SLM invoked        |
+| `sop_suggestions`         | string[] | SOPs suggested     |
+| `human_acknowledged`      | boolean  | Human acknowledged |
+| `offline_mode`            | boolean  | Offline mode       |
+
+## Fallback Rules
+
+| Condition                     | Action                       |
+| ----------------------------- | ---------------------------- |
+| Interpretation low confidence | Show facts only              |
+| SOP low confidence            | "Manual SOP lookup required" |
+| Edge model unavailable        | Use non-LLM summaries        |
+| SOP generated                 | NEVER pass to control path   |
+
+## Configurable Thresholds
+
+```typescript
+const DEFAULT_THRESHOLDS = {
+  operator_summary: { direct_use: 0.8, facts_only: 0.65 },
+  sop_suggestion: { direct_suggest: 0.78, manual_lookup: 0.65 },
+};
+```
+
+| Threshold | Action                       |
+| --------- | ---------------------------- |
+| >= 0.80   | Full summary with inferences |
+| 0.65-0.79 | Facts only, no inferences    |
+| < 0.65    | Human analysis               |
diff --git a/docs/architecture/reference/operations-patterns.md b/docs/architecture/reference/operations-patterns.md
new file mode 100644
index 0000000..01e9b84
--- /dev/null
+++ b/docs/architecture/reference/operations-patterns.md
@@ -0,0 +1,152 @@
+# Operations Patterns
+
+Operational patterns including SLM→LLM decision flows, ownership maps, and implementation guidance.
+
+---
+
+## SLM → LLM Decision Flow
+
+Production handoff logic for routing between SLM and LLM tiers.
+
+```mermaid
+flowchart TD
+    A[Incoming task / event / request] --> B[SLM preprocess]
+
+    B --> C[Intent classification]
+    C --> D[Policy / risk scan]
+    D --> E[Tool-needed check]
+    E --> F[Complexity estimate]
+    F --> G[Confidence score]
+
+    G --> H{Blocked by policy?}
+    H -->|Yes| X[Refuse / redact / quarantine]
+    H -->|No| I{Simple and high confidence?}
+
+    I -->|Yes| Y[Return SLM path result]
+    I -->|No| J{Tool first?}
+
+    J -->|Yes| K[Run tool / workflow]
+    K --> L[SLM compress + validate tool output]
+    L --> M{Enough to answer?}
+    M -->|Yes| Y
+    M -->|No| N[Escalate]
+
+    J -->|No| N[Escalate]
+
+    N --> O[Prepare compact escalation context]
+    O --> P[LLM reasoning / synthesis]
+    P --> Q[Post-LLM policy / quality check]
+    Q --> R[Return final response]
+```
+
+### Threshold Guidelines
+
+Use configurable thresholds, not hardcoded logic.
+
+| Stay in SLM Path                    | Escalate to LLM            |
+| ----------------------------------- | -------------------------- |
+| High confidence                     | Confidence below threshold |
+| Classification/extraction/screening | Policy ambiguity exists    |
+| Short, bounded output               | Tool outputs conflict      |
+| Unambiguous tool result             | Multi-agent disagreement   |
+| Low risk                            | User-facing, high impact   |
+
+### Decision Schema
+
+```json
+{
+  "intent": "ci_failure_triage",
+  "risk_level": "medium",
+  "needs_tool": true,
+  "complexity": "medium",
+  "confidence": 0.81,
+  "policy_status": "allow",
+  "recommended_path": "tool_first",
+  "escalate": false
+}
+```
+
+---
+
+## Repo-to-Service Ownership Map
+
+Maps conceptual stack into likely repo/service boundaries.
+
+```mermaid
+flowchart LR
+    R1[pvc-ai-gateway repo] --> S1[AI Gateway Service]
+    R2[cognitive-mesh repo] --> S2[Cognitive Mesh Service]
+    R3[codeflow-engine repo] --> S3[CodeFlow Engine Service]
+    R4[agentkit-forge repo] --> S4[AgentKit Forge Service]
+    R5[phoenixrooivalk-* repos] --> S5[PhoenixRooivalk Edge + Command Services]
+    R6[shared-platform / contracts / schemas repo] --> S6[Shared Contracts / Telemetry / Policy / SDKs]
+    R7[infra repo] --> S7[Azure Infra / Monitoring / Deployment Pipelines]
+```
+
+### Ownership Summary
+
+| Repo                   | Owns                                                                                                    |
+| ---------------------- | ------------------------------------------------------------------------------------------------------- |
+| **pvc-ai-gateway**     | Ingress API, routing contracts, escalation policy, provider abstraction, semantic cache, audit envelope |
+| **cognitive-mesh**     | Specialist routing, task decomposition, agent state model, synthesis orchestration, disagreement logic  |
+| **codeflow-engine**    | PR event models, diff classification, CI log triage, contract break workflows, comment generation       |
+| **agentkit-forge**     | Tool registry, tool selection schemas, arg extraction, execution-loop state, retry/fallback logic       |
+| **phoenixrooivalk-\*** | Edge event schema, local alerting, escalation packet format, command-layer integration                  |
+| **shared-platform**    | Telemetry envelope, routing decision schema, model usage schema, audit/trace IDs, reusable schemas      |
+| **infra**              | Azure deployment, Grafana/ADX dashboards, Key Vault wiring, service identities, networking              |
+
+---
+
+## Implementation Order
+
+### First
+
+Define shared contracts:
+
+- Routing decision schema
+- Model usage event
+- Tool execution event
+- Audit envelope
+- Edge escalation packet
+
+### Second
+
+Implement telemetry in the gateway:
+
+- Trace ID propagation
+- Decision logs
+- Provider usage events
+- Cost estimation fields
+
+### Third
+
+Bring CodeFlow and AgentKit onto same telemetry envelope.
+
+### Fourth
+
+Add Cognitive Mesh orchestration and disagreement telemetry.
+
+### Fifth
+
+Add Rooivalk edge packet telemetry and sync audit.
+
+---
+
+## Architectural Recommendation
+
+For your environment, the strongest production stance is:
+
+1. **AI Gateway is the only public AI ingress**
+2. **All routing decisions emit one shared RoutingDecision contract**
+3. **All model calls emit one shared ModelUsageEvent**
+4. **All tool invocations flow through a broker or shared event schema**
+5. **All edge escalations use compact evidence packets**
+6. **ADX/Kusto + Grafana becomes the operational truth layer**
+
+This gives you:
+
+- Cost visibility
+- Quality visibility
+- Compliance evidence
+- Easier A/B testing of SLM routing
+- Cleaner failure diagnosis
diff --git a/docs/architecture/reference/slm-implementation-matrix.md b/docs/architecture/reference/slm-implementation-matrix.md
new file mode 100644
index 0000000..eb1fc6e
--- /dev/null
+++ b/docs/architecture/reference/slm-implementation-matrix.md
@@ -0,0 +1,260 @@
+# SLM Implementation Matrix
+
+This document provides a repo-by-repo implementation matrix showing SLM endpoints, contract shapes, telemetry fields, fallback rules, confidence thresholds, and practical service boundaries across all six platforms.
+
+## Quick Reference
+
+| Platform        | SLM Role                                | Key Endpoints                             |
+| --------------- | --------------------------------------- | ----------------------------------------- |
+| AI Gateway      | routing, policy, cost control           | /classify-request, /policy-screen         |
+| Cognitive Mesh  | agent routing, decomposition            | /route-agent, /decompose-task             |
+| CodeFlow Engine | PR triage, failure analysis             | /classify-change, /summarize-failure      |
+| AgentKit Forge  | tool selection, context shaping         | /select-tool, /estimate-budget            |
+| PhoenixRooivalk | event interpretation, SOP suggestions   | /interpret-event, /suggest-sop            |
+| Mystira         | story safety, continuity, image prompts | /check-safety-agefit, /shape-image-prompt |
+
+## Documentation Structure
+
+```
+reference/
+├── slm-implementation-matrix.md      # This file
+├── matrix-gateway.md                  # AI Gateway details
+├── matrix-cognitive-mesh.md          # Cognitive Mesh details
+├── matrix-codeflow.md                # CodeFlow Engine details
+├── matrix-agentkit.md                # AgentKit Forge details
+├── matrix-rooivalk.md                # PhoenixRooivalk details
+└── matrix-mystira.md                 # Mystira details
+```
+
+---
+
+## 1. Cross-Stack Operating Model
+
+Use the same control pattern everywhere:
+
+```mermaid
+flowchart LR
+    A[Input / Event / Request] --> B[Deterministic Guards]
+    B --> C[SLM Control Layer]
+    C --> D{Confidence + Policy}
+    D -->|high confidence| E[Direct Action / Route / Summarize]
+    D -->|medium confidence| F[Tool Path / Restricted Flow]
+    D -->|low confidence| G[LLM Escalation]
+    G --> H[Post-Validation]
+    E --> I[Telemetry + Audit]
+    F --> I
+    H --> I
+```
+
+---
+
+## 2. Canonical SLM Service Interfaces
+
+These are the reusable interface families standardized across the stack.
+
+### A. Classification Contract
+
+```json
+{
+  "request_id": "uuid",
+  "label": "code_review",
+  "confidence": 0.91,
+  "secondary_labels": ["security_review"],
+  "reason_codes": ["contains_diff", "contains_code_terms"],
+  "recommended_action": "route_security_agent"
+}
+```
+
+### B. Routing Contract
+
+```json
+{
+  "request_id": "uuid",
+  "target": "infra_agent",
+  "mode": "single_agent",
+  "escalation_required": false,
+  "tool_candidate": true,
+  "cost_tier": "low",
+  "confidence": 0.88
+}
+```
+
+### C. Compression Contract
+
+```json
+{
+  "request_id": "uuid",
+  "summary": "User wants Azure cost anomaly investigation for Foundry usage.",
+  "retained_facts": [
+    "resource deleted on 2026-03-05",
+    "billing visible from 2026-03-03",
+    "suspected partner local usage"
+  ],
+  "dropped_categories": ["small talk", "repeated screenshots"],
+  "confidence": 0.84
+}
+```
+
+### D. Safety / Moderation Contract
+
+```json
+{
+  "request_id": "uuid",
+  "allowed": true,
+  "risk_level": "low",
+  "risk_categories": [],
+  "action": "allow",
+  "confidence": 0.96
+}
+```
+
+### E. Summarization / Operator Brief Contract
+
+```json
+{
+  "request_id": "uuid",
+  "title": "Possible perimeter drone approach",
+  "summary": "Low-altitude approach detected from north-east sector.",
+  "facts": ["altitude 35m", "entry vector north-east", "rf profile matched consumer quadcopter"],
+  "inferences": ["possible surveillance behavior"],
+  "recommended_next_step": "verify EO feed and initiate SOP-12",
+  "confidence": 0.79
+}
+```
+
+---
+
+## 3. Cross-Platform Confidence Policy
+
+A unified confidence policy across all platforms:
+
+| Confidence | Meaning           | Action                                    |
+| ---------- | ----------------- | ----------------------------------------- |
+| 0.90-1.00  | Strong confidence | Direct automated route/action             |
+| 0.80-0.89  | Acceptable        | Automate with validation                  |
+| 0.70-0.79  | Uncertain         | Restricted automation or human/LLM assist |
+| < 0.70     | Weak              | Escalate or safe fallback                 |
+
+```mermaid
+flowchart TD
+    A[SLM Result] --> B{Confidence >= 0.90?}
+    B -->|Yes| C[Direct Action]
+    B -->|No| D{Confidence >= 0.80?}
+    D -->|Yes| E[Validate & Proceed]
+    D -->|No| F{Confidence >= 0.70?}
+    F -->|Yes| G[Restricted Automation]
+    F -->|No| H[Escalate / Fallback]
+    E --> I[Execute]
+    G --> I
+    H --> I
+```
+
+---
+
+## 4. Cross-Platform Telemetry Schema
+
+Use a common event envelope across all repos:
+
+```json
+{
+  "event_id": "uuid",
+  "timestamp_utc": "2026-03-15T10:00:00Z",
+  "platform": "codeflow-engine",
+  "component": "slm-change-classifier",
+  "model": "phi-3-mini",
+  "operation": "classify-change",
+  "latency_ms": 42,
+  "input_tokens": 612,
+  "output_tokens": 87,
+  "confidence": 0.91,
+  "action_taken": "full_pipeline",
+  "escalated": false,
+  "cost_estimate_usd": 0.0004,
+  "trace_id": "trace-123"
+}
+```
+
+### Recommended Common Fields
+
+| Field               | Type    | Description              |
+| ------------------- | ------- | ------------------------ |
+| `event_id`          | uuid    | Unique event identifier  |
+| `trace_id`          | uuid    | Distributed trace ID     |
+| `platform`          | string  | System name              |
+| `component`         | string  | Specific component       |
+| `operation`         | string  | Operation performed      |
+| `model`             | string  | Model used               |
+| `model_version`     | string  | Model version            |
+| `latency_ms`        | number  | Processing time          |
+| `input_tokens`      | number  | Input token count        |
+| `output_tokens`     | number  | Output token count       |
+| `confidence`        | number  | Model confidence         |
+| `action_taken`      | string  | Action taken             |
+| `escalated`         | boolean | Whether escalated to LLM |
+| `fallback_reason`   | string  | Fallback reason          |
+| `cost_estimate_usd` | number  | Estimated cost           |
+| `tenant_or_project` | string  | Tenant identifier        |
+| `environment`       | string  | Environment              |
+
+---
+
+## 5. Recommended Model-Role Mapping
+
+This is a practical role map, not a vendor mandate.
+
+| Role                    | Recommended Model Profile             |
+| ----------------------- | ------------------------------------- |
+| Classification          | Very small, fast instruct model       |
+| Routing                 | Small instruct model with strict JSON |
+| Safety Prefilter        | Small model + deterministic rules     |
+| Compression             | Small/medium model with schema output |
+| Failure Summarization   | Small instruct model                  |
+| Creative Storytelling   | Larger narrative-capable model        |
+| Deep Synthesis          | Larger reasoning model                |
+| Edge Operator Summaries | Compact on-device model               |
+
+---
+
+## 6. Implementation Order
+
+### Phase 1: Foundation
+
+- AI Gateway request classifier
+- CodeFlow change classifier
+- AgentKit tool selector
+
+### Phase 2: Expansion
+
+- Cognitive Mesh router + decomposer
+- Mystira safety/continuity layer
+
+### Phase 3: Maturation
+
+- PhoenixRooivalk operator interpreter
+- Shared telemetry normalization
+- Confidence calibration dashboards
+
+---
+
+## 7. Cross-System Summary
+
+### Confidence Threshold Summary
+
+| System          | High (direct) | Medium (verify) | Low (escalate) |
+| --------------- | ------------- | --------------- | -------------- |
+| AI Gateway      | >= 0.90       | 0.75-0.89       | < 0.75         |
+| Cognitive Mesh  | >= 0.85       | 0.70-0.84       | < 0.70         |
+| CodeFlow        | >= 0.88       | 0.75-0.87       | < 0.75         |
+| AgentKit Forge  | >= 0.85       | 0.70-0.84       | < 0.70         |
+| PhoenixRooivalk | >= 0.80       | 0.65-0.79       | < 0.65         |
+| Mystira         | >= 0.92       | 0.80-0.91       | < 0.80         |
+
+### Standard Fallback Pattern
+
+```
+1. SLM timeout → Deterministic rules
+2. Low confidence → LLM escalation
+3. Safety critical → Block immediately
+4. Unknown classification → Safe default
+5. All failures → Log + alert + human review
+```
diff --git a/docs/architecture/reference/slm-management-plan.md b/docs/architecture/reference/slm-management-plan.md
new file mode 100644
index 0000000..7c116c5
--- /dev/null
+++ b/docs/architecture/reference/slm-management-plan.md
@@ -0,0 +1,274 @@
+# SLM Management Plan
+
+This document outlines the key concerns and management strategy for SLM deployment across all projects.
+
+## Key Concerns Overview
+
+| Concern              | Priority | Projects Affected       |
+| -------------------- | -------- | ----------------------- |
+| Model Selection      | High     | All                     |
+| Cost Management      | High     | All                     |
+| Latency Requirements | High     | Gateway, Rooivalk       |
+| Edge Deployment      | High     | Rooivalk                |
+| Security & Privacy   | High     | Gateway, Cognitive Mesh |
+| Reliability          | Medium   | All                     |
+| Observability        | Medium   | All                     |
+| Versioning           | Medium   | All                     |
+
+## 1. Model Selection
+
+### Strategy
+
+Maintain a tiered model portfolio:
+
+| Tier        | Models               | Use Cases                      | Cost            |
+| ----------- | -------------------- | ------------------------------ | --------------- |
+| Ultra-light | Phi-3 Mini, Gemma 2B | Classification, routing        | $0.0001/request |
+| Light       | Phi-3, Llama 3 8B    | Tool selection, log analysis   | $0.001/request  |
+| Medium      | Llama 3 70B          | Complex routing, decomposition | $0.01/request   |
+| Heavy       | GPT-4 class          | Reasoning, synthesis           | $0.05+/request  |
+
+### Management
+
+- **Central model registry** with capability matrix
+- **A/B testing framework** for model comparisons
+- **Performance benchmarks** per use case category
+
+## 2. Cost Management
+
+### Strategy
+
+Implement cost controls at each layer:
+
+```
+Cost Control Layers
+┌─────────────────────────────────────┐
+│ 1. Budget caps per project          │
+├─────────────────────────────────────┤
+│ 2. SLM-first routing (80%+ target) │
+├─────────────────────────────────────┤
+│ 3. Confidence-based escalation      │
+├─────────────────────────────────────┤
+│ 4. Request caching                 │
+├─────────────────────────────────────┤
+│ 5. Telemetry & alerting            │
+└─────────────────────────────────────┘
+```
+
+### Targets
+
+| Metric               | Target |
+| -------------------- | ------ |
+| SLM routing %        | >80%   |
+| Cost per 1K requests | <$5    |
+| LLM escalation rate  | <20%   |
+| Cache hit rate       | >30%   |
+
+### Alerts
+
+- Cost spike >20% day-over-day
+- LLM escalation >25%
+- Budget utilization >80%
+
+## 3. Latency Requirements
+
+### Targets by Project
+
+| Project         | Target P99 | Critical Path         |
+| --------------- | ---------- | --------------------- |
+| AI Gateway      | <100ms     | routing decision      |
+| PhoenixRooivalk | <50ms      | threat classification |
+| CodeFlow        | <2s        | PR classification     |
+| Cognitive Mesh  | <500ms     | agent selection       |
+| AgentKit Forge  | <1s        | tool selection        |
+
+### Optimization
+
+- **Model quantization** for edge (int4)
+- **Caching** of frequent decisions
+- **Batch processing** for non-critical tasks
+- **Connection pooling** to inference endpoints
+
+## 4. Edge Deployment (PhoenixRooivalk)
+
+### Critical: SLM is NOT Primary
+
+> **Never use SLM for safety-critical decisions.**
+
+SLM is only for:
+
+- Operator-facing summaries
+- Report generation
+- Post-mission narratives
+
+Core detection uses:
+
+- Rules + signal models + fusion engine
+
+### Strategy
+
+| Requirement        | Solution                        |
+| ------------------ | ------------------------------- |
+| Hardware diversity | Support Jetson, CPU, mobile     |
+| Offline operation  | Full local inference capability |
+| Model updates      | OTA with rollback               |
+| Security           | No external connectivity        |
+
+### Model Optimization
+
+```python
+# Standard edge optimization pipeline
+optimizations = [
+    quantization(weights="int4"),
+    pruning(structured=0.3),
+    distillation(student=phi3_mini),
+    compilation(target="cuda|cpu")
+]
+```
+
+## 5. Security & Privacy
+
+### Strategy
+
+| Layer      | Controls                                  |
+| ---------- | ----------------------------------------- |
+| Input      | Prompt injection detection, PII filtering |
+| Processing | No data leaves boundary                   |
+| Output     | Content filtering, audit logging          |
+| Access     | Role-based model access                   |
+
+### SLM Security Checks
+
+```python
+async def security_pipeline(request: Request) -> SecurityResult:
+    # 1. Prompt injection check
+    injection = await slm_check_injection(request.prompt)
+    if injection.detected:
+        return blocked(injection.reason)
+
+    # 2. PII detection
+    pii = await slm_check_pii(request.prompt)
+    if pii.found:
+        return blocked("PII detected")
+
+    # 3. Policy check
+    policy = await slm_check_policy(request.prompt)
+    if policy.violation:
+        return blocked(policy.violation)
+
+    return allowed()
+```
+
+## 6. Reliability
+
+### Strategy
+
+| Concern             | Mitigation               |
+| ------------------- | ------------------------ |
+| Model downtime      | Fallback models per tier |
+| Latency spikes      | Timeout + escalation     |
+| Quality degradation | Continuous evaluation    |
+| Hallucinations      | Confidence thresholds    |
+
+### Fallback Hierarchy
+
+```
+Request
+   │
+   ▼ Primary SLM
+   │
+   ├─ Success → Return
+   │
+   ├─ Timeout → Fallback SLM
+   │
+   ├─ Low confidence → LLM verification
+   │
+   └─ Failure → Error with telemetry
+```
+
+## 7. Observability
+
+### Metrics Collection
+
+| Metric Type    | Collection                 |
+| -------------- | -------------------------- |
+| Request volume | Per model, per project     |
+| Latency        | P50, P95, P99 per endpoint |
+| Error rate     | By error type, model       |
+| Cost           | Per project, per user      |
+| Quality        | Accuracy, escalation rate  |
+
+### Dashboards
+
+- **Cost Dashboard**: Spend by project, model, day
+- **Performance Dashboard**: Latency by tier
+- **Quality Dashboard**: Accuracy, false positives
+
+## 8. Versioning
+
+### Strategy
+
+| Component      | Versioning       | Update Frequency   |
+| -------------- | ---------------- | ------------------ |
+| Models         | Semantic (1.0.0) | Monthly evaluation |
+| Prompts        | Git-based        | Per task           |
+| Infrastructure | Terraform        | Per deployment     |
+
+### Model Lifecycle
+
+```
+Discovery → Testing → Staging → Production → Deprecated → Retired
+    │           │         │          │            │
+    ▼           ▼         ▼          ▼            ▼
+ Evaluate   A/B test   Shadow mode  Active      Fallback
+```
+
+## Project-Specific Concerns
+
+### AI Gateway
+
+- High-volume routing
+- Security-first evaluation
+- Real-time cost tracking
+
+### Cognitive Mesh
+
+- Agent capability mapping
+- Task decomposition accuracy
+- Multi-agent coordination
+
+### PhoenixRooivalk
+
+- **CRITICAL**: SLM NOT for safety decisions
+- Edge hardware diversity
+- Offline reliability
+- Minimal latency
+
+### CodeFlow Engine
+
+- PR classification accuracy
+- CI log analysis quality
+- Auto-merge reliability
+
+### AgentKit Forge
+
+- Tool selection accuracy
+- Context compression ratio
+- LLM call reduction
+
+## Canonical Principle
+
+> **Use SLMs to decide, filter, classify, compress, and prepare.**
+> **Use LLMs to reason, reconcile, synthesize, and communicate.**
+
+## Action Items
+
+1. [ ] Establish model registry with tiered selection
+2. [ ] Implement cost tracking per project
+3. [ ] Set up latency monitoring dashboards
+4. [ ] Create edge deployment pipeline
+5. [ ] Build security check pipeline
+6. [ ] Define fallback hierarchies
+7. [ ] Implement observability stack
+8. [ ] Document model lifecycle process
+9. [ ] **Add explicit safety boundary for PhoenixRooivalk**
diff --git a/docs/architecture/reference/strategic/01-why-slms-matter.md b/docs/architecture/reference/strategic/01-why-slms-matter.md
new file mode 100644
index 0000000..653e273
--- /dev/null
+++ b/docs/architecture/reference/strategic/01-why-slms-matter.md
@@ -0,0 +1,86 @@
+# Why SLMs Matter in These Systems
+
+This document explains the strategic value of Small Language Models (SLMs) across the ecosystem.
+
+## Executive Summary
+
+Across all six platforms, SLMs provide:
+
+| Benefit                    | Description                                 |
+| -------------------------- | ------------------------------------------- |
+| **Cost Control**           | Large models are invoked only when required |
+| **Latency Reduction**      | Routing decisions happen in milliseconds    |
+| **Edge Deployment**        | PhoenixRooivalk can run inference locally   |
+| **Deterministic Behavior** | SLMs are easier to constrain and audit      |
+
+## Summary Table
+
+| System          | SLM Role                                |
+| --------------- | --------------------------------------- |
+| AI Gateway      | routing, policy checks, cost prediction |
+| Cognitive Mesh  | agent routing, task decomposition       |
+| PhoenixRooivalk | edge telemetry analysis                 |
+| CodeFlow Engine | CI intelligence, log analysis           |
+| AgentKit Forge  | tool selection, context compression     |
+| Mystira         | story safety, continuity, age-fit       |
+
+---
+
+## Design Principle
+
+The best use of SLMs is not "replace the big model." It is:
+
+```mermaid
+flowchart LR
+    S[Screen First] --> R[Route Cheap]
+    R --> E[Escalate Selectively]
+    E --> C[Compress Context Aggressively]
+    C --> L[Keep Edge Decisions Local]
+```
+
+| Principle                | Description                                                    |
+| ------------------------ | -------------------------------------------------------------- |
+| **Screen First**         | SLMs handle initial classification before expensive operations |
+| **Route Cheap**          | Direct simple requests to SLMs or small models                 |
+| **Escalate Selectively** | Only invoke LLMs for complex, ambiguous tasks                  |
+| **Compress Context**     | SLMs reduce token volume before LLM processing                 |
+| **Keep Edge Local**      | PhoenixRooivalk operates without cloud dependency              |
+
+---
+
+## Reference Architecture
+
+```mermaid
+flowchart TD
+    U[Users / Operators / CI Events / Sensor Feeds]
+    U --> G[AI Gateway]
+    G --> G1[SLM: intent classification]
+    G --> G2[SLM: safety / policy scan]
+    G --> G3[SLM: cost routing]
+    G --> G4[Cache / provider selection]
+    G4 --> CM[Cognitive Mesh]
+    G4 --> CF[CodeFlow Engine]
+    G4 --> AF[AgentKit Forge]
+    G4 --> PR[PhoenixRooivalk]
+    G4 --> MY[Mystira]
+    CM --> L1[LLM: deep reasoning]
+    CF --> L2[LLM: remediation]
+    AF --> L3[LLM: synthesis]
+    MY --> L4[LLM: narrative]
+```
+
+---
+
+## Strategic Recommendation
+
+SLMs should be treated as:
+
+- **Control-plane intelligence**: Routing, classification, decision-making
+- **Cheap operational cognition**: Fast, repetitive tasks
+- **First-pass classifiers**: Initial triage before expensive operations
+- **Context reducers**: Compressing data for efficient processing
+- **Edge interpreters**: Local processing without cloud dependency
+
+**Not** as replacements for the reasoning tier.
+
+> **SLMs run the flow. LLMs solve the hard parts.**
diff --git a/docs/architecture/reference/strategic/02-gateway-slm-use-cases.md b/docs/architecture/reference/strategic/02-gateway-slm-use-cases.md
new file mode 100644
index 0000000..1d62542
--- /dev/null
+++ b/docs/architecture/reference/strategic/02-gateway-slm-use-cases.md
@@ -0,0 +1,90 @@
+# AI Gateway — Practical SLM Use Cases
+
+AI Gateway is the highest-leverage place to put SLMs because every request passes through it.
+
+## Best-Fit SLM Tasks
+
+### A. Intent and Complexity Classification
+
+The SLM predicts:
+
+- Request type
+- Risk level
+- Likely tool need
+- Token size estimate
+- Recommended model tier
+
+```json
+{
+  "intent": "repo_analysis",
+  "complexity": "medium",
+  "tool_required": true,
+  "security_risk": "low",
+  "recommended_tier": "mid"
+}
+```
+
+### B. Safety and Data-Boundary Screening
+
+Before a request hits an expensive model:
+
+- Secret leakage scan
+- PII detection
+- Jailbreak/prompt-injection detection
+- Tenant/policy checks
+- Outbound data classification
+
+### C. Budget-Aware Routing
+
+Use the SLM to decide:
+
+- Direct answer with small model
+- Call tool first
+- Escalate to reasoning model
+- Deny or redact
+- Cache hit / semantic cache reuse
+
+## Practical Gateway Flow
+
+```mermaid
+flowchart LR
+    A[Client Request] --> B[Gateway Ingress]
+    B --> C[SLM Classifier]
+    C --> D[SLM Policy Scan]
+    D --> E[Budget / Latency Rules]
+    E --> F{Decision}
+    F -->|simple| G[Small Model]
+    F -->|tool-first| H[Tool Execution]
+    F -->|complex| I[Large Model]
+    F -->|blocked| J[Policy Refusal]
+    H --> K[Post-tool SLM summarizer]
+    K --> I
+```
+
+## Why It Fits AI Gateway
+
+| Benefits                      | Tradeoffs                              |
+| ----------------------------- | -------------------------------------- |
+| Major cost reduction          | Misrouting risk if classifier is weak  |
+| Faster median latency         | Extra hop in pipeline                  |
+| Consistent policy enforcement | Need calibration, thresholds, fallback |
+| Cleaner observability         |                                        |
+
+## Where It Breaks Down
+
+- Vague prompts
+- Multi-domain prompts
+- Hidden tool requirements
+- Requests where complexity is not obvious
+
+## Recommended Pattern
+
+Use the SLM as a **triage layer, not the final authority**. If confidence is low, escalate automatically.
+
+### Threshold Guide
+
+| Confidence | Action            |
+| ---------- | ----------------- |
+| >= 0.90    | Direct routing    |
+| 0.75-0.89  | Verify with rules |
+| < 0.75     | Escalate to LLM   |
diff --git a/docs/architecture/reference/strategic/03-cognitive-mesh-use-cases.md b/docs/architecture/reference/strategic/03-cognitive-mesh-use-cases.md
new file mode 100644
index 0000000..4255e87
--- /dev/null
+++ b/docs/architecture/reference/strategic/03-cognitive-mesh-use-cases.md
@@ -0,0 +1,95 @@
+# Cognitive Mesh — Practical SLM Use Cases
+
+Cognitive Mesh is where SLMs become orchestration primitives.
+
+## Best-Fit SLM Tasks
+
+### A. Specialist Routing
+
+The SLM decides which node gets the task:
+
+- infra
+- code
+- security
+- research
+- finance
+- documentation
+- architecture
+
+### B. Task Decomposition
+
+Before invoking expensive reasoning, the SLM splits tasks into atomic units.
+
+Example: "Review this repo and propose a deploy plan" becomes:
+
+1. Detect stack
+2. Detect infra
+3. Detect secrets/compliance issues
+4. Map CI/CD
+5. Draft deploy sequence
+
+### C. State Summarization
+
+Multi-agent systems accumulate long histories. An SLM maintains:
+
+- Current objective
+- Known constraints
+- Prior decisions
+- Unresolved blockers
+- Tool outputs summary
+
+### D. Agent Health and Loop Detection
+
+The SLM can classify:
+
+- Repeated retries
+- Tool thrashing
+- No-progress loops
+- Conflicting agent outputs
+
+## Practical Cognitive Mesh Flow
+
+```mermaid
+flowchart TD
+    U[User] --> R[SLM Router]
+    R --> A1[Architect Agent]
+    R --> A2[Coder Agent]
+    R --> A3[Security Agent]
+    R --> A4[Infra Agent]
+    R --> A5[Research Agent]
+    A1 --> S[SLM State Manager]
+    A2 --> S
+    A3 --> S
+    A4 --> S
+    A5 --> S
+    S --> X{Need deep reasoning?}
+    X -->|No| O[Assemble Response]
+    X -->|Yes| L[LLM Synthesis]
+    L --> O
+```
+
+## Why It Fits Cognitive Mesh
+
+| Benefits              | Tradeoffs                     |
+| --------------------- | ----------------------------- |
+| Cheaper orchestration | Decomposition quality matters |
+| Faster routing        | Errors compound downstream    |
+| Smaller context       | Summaries can lose nuance     |
+| Better determinism    |                               |
+
+## Best Operational Pattern
+
+| Use SLMs For             | Use LLMs For            |
+| ------------------------ | ----------------------- |
+| "Who should do this?"    | Final synthesis         |
+| "What is the next step?" | Architecture evaluation |
+| "What matters here?"     | Novel reasoning         |
+| "Are we stuck?"          |                         |
+
+## Threshold Guide
+
+| Confidence | Action            |
+| ---------- | ----------------- |
+| >= 0.85    | Direct routing    |
+| 0.70-0.84  | Verify with rules |
+| < 0.70     | Escalate to LLM   |
diff --git a/docs/architecture/reference/strategic/04-codeflow-use-cases.md b/docs/architecture/reference/strategic/04-codeflow-use-cases.md
new file mode 100644
index 0000000..f1024fe
--- /dev/null
+++ b/docs/architecture/reference/strategic/04-codeflow-use-cases.md
@@ -0,0 +1,87 @@
+# CodeFlow Engine — Practical SLM Use Cases
+
+CodeFlow Engine is one of the strongest SLM domains because CI/CD workloads are repetitive, structured, and high-volume.
+
+## Best-Fit SLM Tasks
+
+### A. PR Classification
+
+Classify a PR as:
+
+- docs-only
+- low-risk refactor
+- dependency update
+- infra change
+- security-sensitive
+- contract-breaking
+- test-only
+- release-impacting
+
+### B. Diff Summarization
+
+Generate short structured summaries from git diff and changed files.
+
+### C. CI Failure Triage
+
+Classify failures into:
+
+- test regression
+- flaky test
+- dependency resolution
+- auth/secret issue
+- infra provisioning error
+- timeout/resource exhaustion
+- lint/type failure
+
+### D. Review Routing
+
+Decide which reviewers or agent flows should be triggered.
+
+### E. Release-Note Extraction
+
+Extract user-facing change notes without using a full LLM.
+
+## Practical CodeFlow Pipeline
+
+```mermaid
+flowchart LR
+    GP[Git Push / PR] --> IN[Ingest]
+    IN --> S1[SLM: diff classifier]
+    IN --> S2[SLM: risk scorer]
+    S1 --> D{Decision}
+    S2 --> D
+    D -->|low-risk| Q[Fast CI]
+    D -->|high-risk| F[Full CI]
+    D -->|unclear| L[LLM Review]
+    F --> T[CI Logs]
+    Q --> T
+    T --> C[SLM: triage]
+    C --> R[Action]
+```
+
+## Why It Fits CodeFlow Engine
+
+| Benefits                   | Tradeoffs                   |
+| -------------------------- | --------------------------- |
+| Huge cost savings at scale | False negatives possible    |
+| Strong consistency         | Requires designed schemas   |
+| Better PR throughput       | Model drift affects quality |
+| Repetitive workload fit    |                             |
+
+## Strongest SLM Opportunities
+
+Given emphasis on contract diffs, OpenAPI breakage, schema validation, CI gates:
+
+- Change intent detection
+- Docs generation hints
+- Issue bucketing
+- Runbook suggestion
+- Log compression before escalation
+
+## Threshold Guide
+
+| Confidence | Action            |
+| ---------- | ----------------- |
+| >= 0.88    | Direct use        |
+| 0.75-0.87  | Verify with rules |
+| < 0.75     | Manual review     |
diff --git a/docs/architecture/reference/strategic/05-agentkit-use-cases.md b/docs/architecture/reference/strategic/05-agentkit-use-cases.md
new file mode 100644
index 0000000..ebd5d4d
--- /dev/null
+++ b/docs/architecture/reference/strategic/05-agentkit-use-cases.md
@@ -0,0 +1,85 @@
+# AgentKit Forge — Practical SLM Use Cases
+
+AgentKit Forge is ideal for SLMs because tool-heavy agents don't need a large model for every micro-decision.
+
+## Best-Fit SLM Tasks
+
+### A. Tool Selection
+
+Choose among:
+
+- GitHub API
+- Azure CLI
+- Terraform
+- Kusto
+- File retrieval
+- Documentation lookup
+- Shell command
+- Search
+
+### B. Parameter Extraction
+
+Pull structured arguments out of the request before calling the tool.
+
+### C. Context Compression
+
+Convert long tool traces into compact operational state.
+
+### D. Step Validation
+
+Check whether a step result is sufficient before moving to next step.
+
+### E. Retry / Fallback Logic
+
+Classify whether an error merits:
+
+- Retry
+- Alternate tool
+- Human intervention
+- Escalation to larger model
+
+## Practical AgentKit Flow
+
+```mermaid
+flowchart TD
+    T[Agent Task] --> P[SLM Planner]
+    P --> TS[SLM Tool Selector]
+    TS --> G1[GitHub]
+    TS --> G2[Azure]
+    TS --> G3[Terraform]
+    TS --> G4[Kusto]
+    G1 --> M[SLM Compressor]
+    G2 --> M
+    G3 --> M
+    G4 --> M
+    M --> V{Enough?}
+    V -->|Yes| A[Response]
+    V -->|No| L[Escalate LLM]
+    L --> A
+```
+
+## Why It Fits AgentKit Forge
+
+| Benefits             | Tradeoffs                   |
+| -------------------- | --------------------------- |
+| Lower token burn     | Brittle if schemas weak     |
+| Faster tool loops    | Poor extraction = bad calls |
+| Improved determinism | Compression can lose detail |
+| Cleaner contracts    |                             |
+
+## Design Rule
+
+| Let SLMs Own | Let LLMs Own         |
+| ------------ | -------------------- |
+| Selection    | Synthesis            |
+| Extraction   | Ambiguity resolution |
+| Compression  | Multi-tool planning  |
+| Validation   |                      |
+
+## Threshold Guide
+
+| Confidence | Action               |
+| ---------- | -------------------- |
+| >= 0.85    | Direct execution     |
+| 0.70-0.84  | Require confirmation |
+| < 0.70     | Decline / clarify    |
diff --git a/docs/architecture/reference/strategic/06-rooivalk-use-cases.md b/docs/architecture/reference/strategic/06-rooivalk-use-cases.md
new file mode 100644
index 0000000..6bdb6dc
--- /dev/null
+++ b/docs/architecture/reference/strategic/06-rooivalk-use-cases.md
@@ -0,0 +1,76 @@
+# PhoenixRooivalk — Practical SLM Use Cases
+
+PhoenixRooivalk is different because the core advantage is locality, latency, and resilience—not just cost.
+
+## Best-Fit SLM Tasks
+
+### A. Edge Event Labeling
+
+Convert telemetry into categories:
+
+- loitering
+- fast ingress
+- signal loss
+- RF anomaly
+- perimeter breach candidate
+- operator attention required
+
+### B. Operator-Facing Summary
+
+Turn noisy sensor events into concise, human-readable alerts.
+
+### C. Log-to-Report Conversion
+
+Mission logs, detections, and post-event evidence can be summarized locally.
+
+### D. Escalation Gating
+
+Only send selected events to cloud when:
+
+- Confidence above threshold
+- Event duration exceeds threshold
+- Evidence bundle sufficient
+- Bandwidth available
+
+## Practical Edge Flow
+
+```mermaid
+flowchart LR
+    S[RF / EO / Radar / Telemetry] --> N[Detection Pipeline]
+    N --> E[Edge SLM]
+    E --> L1[Event Label]
+    E --> L2[Threat Summary]
+    E --> L3[Alert Text]
+    E --> L4[Escalation]
+    L4 -->|local| O[Console]
+    L4 -->|upstream| C[Cloud]
+```
+
+## Why It Fits PhoenixRooivalk
+
+| Benefits                 | Tradeoffs                 |
+| ------------------------ | ------------------------- |
+| Low latency              | Limited reasoning depth   |
+| Offline capability       | Edge hardware constraints |
+| Bandwidth savings        | Must handle noisy inputs  |
+| Privacy / sovereignty    | Needs tight prompt design |
+| Constrained hardware fit |                           |
+
+## CRITICAL: Important Boundary
+
+Do NOT let SLM become sole authority for kinetic or high-stakes decisions.
+
+| Use SLM For      | NOT For                      |
+| ---------------- | ---------------------------- |
+| Interpretation   | Critical threat adjudication |
+| Summarization    | Response triggering          |
+| Prioritization   | Access control               |
+| Operator support | Resource isolation           |
+
+## Threshold Guide
+
+| Confidence | Action         |
+| ---------- | -------------- |
+| >= 0.80    | Full summary   |
+| 0.65-0.79  | Facts only     |
+| < 0.65     | Human analysis |
diff --git a/docs/architecture/reference/strategic/07-deployment-model.md b/docs/architecture/reference/strategic/07-deployment-model.md
new file mode 100644
index 0000000..e4e6f8d
--- /dev/null
+++ b/docs/architecture/reference/strategic/07-deployment-model.md
@@ -0,0 +1,75 @@
+# Practical Deployment Model
+
+This is the recommended stack for the ecosystem.
+
+## Full Stack Architecture
+
+```mermaid
+flowchart TD
+    A[Ingress] --> B[AI Gateway SLM]
+    B --> C1[Fast-path]
+    B --> C2[Tool-first]
+    B --> C3[Escalation]
+    C2 --> D1[AgentKit]
+    C2 --> D2[CodeFlow]
+    C2 --> D3[Cognitive Mesh]
+    D1 --> E1[SLM tool loops]
+    D2 --> E2[SLM CI triage]
+    D3 --> E3[SLM orchestration]
+    E1 --> F[LLM Pool]
+    E2 --> F
+    E3 --> F
+    G[Rooivalk Edge] --> H[Local SLM]
+    H --> I[Local / Cloud]
+    F --> J[Observability]
+```
+
+## Decision Matrix
+
+| System          | Best SLM Jobs              | Less Suitable                  |
+| --------------- | -------------------------- | ------------------------------ |
+| AI Gateway      | routing, screening, cost   | Nuanced synthesis              |
+| Cognitive Mesh  | routing, decomposition     | Final judgment                 |
+| CodeFlow        | PR triage, log analysis    | Root cause across dependencies |
+| AgentKit        | tool selection, extraction | Multi-step planning            |
+| PhoenixRooivalk | summaries, alerts          | Sole threat authority          |
+| Mystira         | safety, continuity         | Rich narrative                 |
+
+## Practical Gateway Flow
+
+```mermaid
+flowchart LR
+    A[Request] --> B[Classifier]
+    B --> C[Policy Scan]
+    C --> D[Budget Rules]
+    D --> E{Decision}
+    E -->|simple| F[Small Model]
+    E -->|tool| G[Tools]
+    E -->|complex| H[LLM]
+    E -->|blocked| I[Refusal]
+    G --> J[Post-tool Summarizer]
+    J --> H
+```
+
+## End-to-End Example
+
+A developer opens a PR that changes Terraform, GitHub Actions, and an OpenAPI spec:
+
+```mermaid
+sequenceDiagram
+    Dev->>GH: Open PR
+    GH->>CF: Event
+    CF->>SLM: Classify + risk
+    SLM-->>CF: infra-change, high risk
+    CF->>GH: Full CI + contract checks
+    GH-->>CF: Results
+    CF->>SLM: Triage logs
+    SLM-->>CF: Breaking change detected
+    CF->>AG: Escalate
+    AG->>LLM: Reasoning
+    LLM-->>AG: Advice
+    AG-->>CF: Response
+    CF-->>GH: Comment
+```
+
+SLMs handle repetitive triage; LLMs solve the hard part.
diff --git a/docs/architecture/reference/strategic/08-implementation-order.md b/docs/architecture/reference/strategic/08-implementation-order.md
new file mode 100644
index 0000000..fde2524
--- /dev/null
+++ b/docs/architecture/reference/strategic/08-implementation-order.md
@@ -0,0 +1,56 @@
+# Recommended Implementation Order
+
+For the stack, the highest ROI sequence is:
+
+## Phase 1: Gateway Foundation
+
+- AI Gateway intent classifier
+- AI Gateway policy scan
+- Route-to-tier decision
+- Semantic cache admission
+
+**Value**: Highest immediate cost-leverage
+
+## Phase 2: CI Intelligence
+
+- CodeFlow Engine PR risk classifier
+- CodeFlow Engine CI failure bucketing
+- CodeFlow Engine release-note summarizer
+
+**Value**: Fastest operational value
+
+## Phase 3: Agent Runtime
+
+- AgentKit Forge tool selector
+- AgentKit Forge parameter extractor
+- AgentKit Forge context compressor
+
+**Value**: Lower token burn, faster tool loops
+
+## Phase 4: Orchestration
+
+- Cognitive Mesh specialist router
+- Cognitive Mesh decomposition engine
+- Cognitive Mesh state manager
+
+**Value**: Strong leverage once taxonomy stabilizes
+
+## Phase 5: Edge
+
+- PhoenixRooivalk edge event summarizer
+- PhoenixRooivalk operator alert composer
+- PhoenixRooivalk escalation filter
+
+**Value**: Keep isolated from critical control
+
+## Summary
+
+| Phase | System          | Priority |
+| ----- | --------------- | -------- |
+| 1     | AI Gateway      | Highest  |
+| 2     | CodeFlow        | High     |
+| 3     | AgentKit Forge  | Medium   |
+| 4     | Cognitive Mesh  | Medium   |
+| 5     | PhoenixRooivalk | Lower    |
+
+That order gives fastest operational value with lowest implementation risk.
diff --git a/docs/architecture/reference/strategic/README.md b/docs/architecture/reference/strategic/README.md
new file mode 100644
index 0000000..0644088
--- /dev/null
+++ b/docs/architecture/reference/strategic/README.md
@@ -0,0 +1,28 @@
+# Strategic SLM Guidance
+
+This folder contains strategic guidance on why SLMs matter and how to deploy them across the ecosystem.
+
+## Documents
+
+- [01-why-slms-matter.md](01-why-slms-matter.md) - Executive summary and core principles
+- [02-gateway-slm-use-cases.md](02-gateway-slm-use-cases.md) - AI Gateway practical use cases
+- [03-cognitive-mesh-use-cases.md](03-cognitive-mesh-use-cases.md) - Cognitive Mesh practical use cases
+- [04-codeflow-use-cases.md](04-codeflow-use-cases.md) - CodeFlow Engine practical use cases
+- [05-agentkit-use-cases.md](05-agentkit-use-cases.md) - AgentKit Forge practical use cases
+- [06-rooivalk-use-cases.md](06-rooivalk-use-cases.md) - PhoenixRooivalk practical use cases
+- [07-deployment-model.md](07-deployment-model.md) - Practical deployment model and decision matrix
+- [08-implementation-order.md](08-implementation-order.md) - Recommended implementation sequence
+
+## Quick Navigation
+
+| Phase           | System          | Document                                                         |
+| --------------- | --------------- | ---------------------------------------------------------------- |
+| Foundation      | AI Gateway      | [02-gateway-slm-use-cases.md](02-gateway-slm-use-cases.md)       |
+| CI Intelligence | CodeFlow Engine | [04-codeflow-use-cases.md](04-codeflow-use-cases.md)             |
+| Agent Runtime   | AgentKit Forge  | [05-agentkit-use-cases.md](05-agentkit-use-cases.md)             |
+| Orchestration   | Cognitive Mesh  | [03-cognitive-mesh-use-cases.md](03-cognitive-mesh-use-cases.md) |
+| Edge            | PhoenixRooivalk | [06-rooivalk-use-cases.md](06-rooivalk-use-cases.md)             |
+
+## Core Principle
+
+> **SLMs run the flow. LLMs solve the hard parts.**
diff --git a/docs/architecture/systems/agentkit-forge.md b/docs/architecture/systems/agentkit-forge.md
new file mode 100644
index 0000000..c84a182
--- /dev/null
+++ b/docs/architecture/systems/agentkit-forge.md
@@ -0,0 +1,192 @@
+# AgentKit Forge
+
+AgentKit Forge builds AI agents and orchestration workflows. SLMs help when agents have **many tools** and **large working memory**.
+
+## Architecture
+
+```
+Agent Task
+      │
+      ▼
+┌─────────────────────────────────────┐
+│      SLM Execution Governor          │
+│  (tool selection, memory, budget)   │
+└─────────────────────────────────────┘
+      │
+      ▼
+Tool Selection
+      │
+      ├─→ GitHub API
+      ├─→ Azure CLI
+      ├─→ Terraform
+      ├─→ Documentation Search
+      └─→ LLM Synthesis
+```
+
+## Most Practical SLM Jobs
+
+### 1. Tool Selector
+
+Map user or system request to the right tool.
+
+```json
+{
+  "tool": "azure_cli",
+  "command": "az monitor metrics list",
+  "args": {
+    "resource": "/subscriptions/.../appinsights/...",
+    "metric": "requests"
+  },
+  "confidence": 0.92
+}
+```
+
+### 2. Relevance Filter
+
+Only send necessary state to expensive models.
+
+```json
+{
+  "relevant_context": ["terraform_plan", "error_logs"],
+  "pruned_context": ["old_successful_deploys", "unrelated_metrics"],
+  "estimated_tokens": 3500
+}
+```
+
+### 3. Budget Governor
+
+Estimate likely token spend and whether tool-first is sufficient.
+
+```json
+{
+  "estimated_tokens": 8000,
+  "can_fit_in_window": true,
+  "should_use_tool_first": true,
+  "budget_tier": "medium"
+}
+```
+
+### 4. Execution Classifier
+
+Distinguish how to handle the request.
+
+```json
+{
+  "action": "use_tool",
+  "tool_name": "github_api",
+  "escalate_to_llm": false,
+  "reason": "simple data retrieval"
+}
+```
+
+## Implementation
+
+### Tool Selection
+
+```python
+async def select_tool(task: str, available_tools: list[Tool]) -> ToolInvocation:
+    prompt = f"""Select the best tool for this task.
+
+Task: {task}
+
+Available tools:
+{format_tools(available_tools)}
+
+Output: tool_name, args, confidence"""
+
+    result = await slm_completion(prompt)
+    return ToolInvocation(
+        tool=result.tool,
+        args=result.args,
+        confidence=result.confidence
+    )
+```
+
+### Context Planning
+
+```python
+async def plan_context(task: str, context_options: list[Context]) -> ContextPlan:
+    prompt = f"""Plan which context to use for this task.
+
+Task: {task}
+
+Available context:
+{format_context(context_options)}
+
+Output: required_context, optional_context, estimated_tokens"""
+
+    return await slm_completion(prompt)
+```
+
+### Budget Governor
+
+```python
+async def govern_budget(task: str) -> BudgetDecision:
+    prompt = f"""Estimate token budget for this task.
+
+Task: {task}
+
+Consider: context size, expected output, complexity"""
+
+    estimate = await slm_completion(prompt)
+
+    return BudgetDecision(
+        estimated_tokens=estimate.tokens,
+        can_fit=estimate.can_fit,
+        should_escalate=estimate.should_escalate
+    )
+```
+
+### Multi-Step Execution
+
+```python
+async def execute_agent_task(task: str) -> AgentResult:
+    # Step 1: Decompose
+    plan = await slm_decompose(task)
+
+    # Step 2: Execute each step with tool selection
+    for step in plan.steps:
+        tool = await select_tool(step.description, available_tools)
+        result = await execute_tool(tool)
+
+        # Step 3: Check if escalation needed
+        if result.complexity == "high":
+            llm_result = await llm_complete(step, context)
+            result = llm_result
+
+    return aggregate_results(plan.steps)
+```
+
+## Tradeoffs
+
+| Pros                                | Cons                                           |
+| ----------------------------------- | ---------------------------------------------- |
+| Keeps agent execution lean          | Weak tool selection harms trust                |
+| Lowers token burn dramatically      | Compressed memory can omit critical edge cases |
+| Improves tool invocation discipline | Too much reliance can make agents look shallow |
+
+## Key Concerns
+
+| Concern       | Strategy                               |
+| ------------- | -------------------------------------- |
+| Tool accuracy | Validate tool exists before invocation |
+| Context bloat | SLM filters context before LLM         |
+| Cost          | Route 70%+ through SLM tool selection  |
+| Reliability   | Fallback to LLM on low confidence      |
+
+## Tool Categories
+
+| Category     | SLM Handles        | LLM Handles        |
+| ------------ | ------------------ | ------------------ |
+| CLI commands | selection + args   | complex pipelines  |
+| API calls    | endpoint selection | response parsing   |
+| File ops     | path determination | content generation |
+| Queries      | query construction | result synthesis   |
+
+## Implementation Checklist
+
+- [ ] Implement tool selection with confidence scores
+- [ ] Add relevance filtering for context
+- [ ] Implement budget governor with token estimation
+- [ ] Add execution classification (direct/tool/LLM)
+- [ ] Set up fallback to LLM on low confidence
diff --git a/docs/architecture/systems/ai-gateway.md b/docs/architecture/systems/ai-gateway.md
new file mode 100644
index 0000000..5a288f0
--- /dev/null
+++ b/docs/architecture/systems/ai-gateway.md
@@ -0,0 +1,146 @@
+# AI Gateway
+
+AI Gateway sits between applications and multiple AI providers. The SLM acts as the **admission control and routing brain** — the fast, cheap, deterministic control layer before expensive model invocation.
+
+## Architecture
+
+```
+Client Request
+      │
+      ▼
+┌─────────────────────────────────────┐
+│         SLM Control Layer            │
+│  (intent, complexity, risk, tools)  │
+└─────────────────────────────────────┘
+      │
+      ▼
+Routing Decision
+      │
+      ├─→ Cache (if cacheable)
+      ├─→ Tool call
+      ├─→ SLM response
+      ├─→ Small model
+      └─→ Large model escalation
+```
+
+## SLM as Admission Control
+
+The SLM sits **before** expensive model invocation and sometimes **after** provider response for tagging/telemetry normalization.
+
+### Best SLM Use Cases
+
+| Use Case                 | Description                    | Output Schema                      |
+| ------------------------ | ------------------------------ | ---------------------------------- |
+| Intent Classification    | Determine user intent          | `{ "intent": "code_review", ... }` |
+| Complexity Scoring       | Rate request complexity        | `{ "complexity": "medium", ... }`  |
+| Tool Eligibility         | Detect if tool call needed     | `{ "tool_candidate": true, ... }`  |
+| Safety Prefiltering      | Prompt injection, PII, secrets | `{ "risk": "low", ... }`           |
+| Cache Key Enrichment     | Generate cache keys            | `{ "cacheable": false, ... }`      |
+| Telemetry Categorization | Tag for observability          | `{ "category": "analysis", ... }`  |
+| Tenant Policy Gating     | Per-tenant routing rules       | `{ "tier": "premium", ... }`       |
+
+### Why This Works
+
+These tasks are:
+
+- **Short-context** — SLM handles easily
+- **Repetitive** — High cache hit potential
+- **Structured** — Schema-bound outputs
+- **Latency-sensitive** — SLM is fast
+
+### Good SLM Output
+
+```json
+{
+  "intent": "code_review",
+  "complexity": "medium",
+  "tool_candidate": true,
+  "risk": "low",
+  "cacheable": false,
+  "recommended_tier": "large"
+}
+```
+
+## Implementation
+
+### Routing Logic
+
+```python
+async def gateway_admission(request: Request) -> AdmissionDecision:
+    # SLM does admission control
+    classification = await slm_classify(request.prompt)
+
+    # Route based on classification
+    if classification.cacheable:
+        cached = await check_cache(classification.cache_key)
+        if cached:
+            return CachedResponse(cached)
+
+    if classification.tool_candidate:
+        return await route_to_tools(classification)
+
+    if classification.complexity == "low":
+        return await route_to_slm(classification)
+
+    # Escalate to LLM
+    return await route_to_llm(classification)
+```
+
+### Policy Check Pipeline
+
+```python
+async def security_scan(prompt: str) -> SecurityResult:
+    checks = await asyncio.gather(
+        slm_check_injection(prompt),
+        slm_check_pii(prompt),
+        slm_check_secrets(prompt)
+    )
+
+    if any(check.flagged for check in checks):
+        return SecurityResult(blocked=True, reason=checks)
+
+    return SecurityResult(allowed=True)
+```
+
+## Tradeoffs
+
+| Pros                            | Cons                                               |
+| ------------------------------- | -------------------------------------------------- |
+| Major cost reduction            | Misrouting risk if classifier is weak              |
+| Consistent routing              | Small models can under-detect subtle safety issues |
+| Lower p95 latency               | More moving parts in gateway logic                 |
+| Easier telemetry and governance |                                                    |
+
+## Key Concerns
+
+| Concern  | Strategy                                     |
+| -------- | -------------------------------------------- |
+| Latency  | SLM runs inline; must respond in <50ms       |
+| Accuracy | Cascade: low confidence → LLM verification   |
+| Cost     | Route 80%+ to SLMs; LLM only for escalation  |
+| Security | SLM policy check before any model invocation |
+
+## SLM Model Selection
+
+Recommended models for gateway classification:
+
+- Phi-3 Mini (3.8B) - fast, accurate
+- Llama 3 8B - good general classification
+- Gemma 2B - minimal latency
+
+## Metrics
+
+Track per routing decision:
+
+- SLM vs LLM routing ratio
+- Average latency by route type
+- Escalation rate (SLM → LLM)
+- Cost per 1K requests
+
+## Implementation Checklist
+
+- [ ] Add SLM policy envelope returning intent, complexity, risk, cacheability, tier
+- [ ] Implement cascade pattern for low confidence → LLM
+- [ ] Add security prefiltering (injection, PII, secrets)
+- [ ] Set up cost tracking per tier
+- [ ] Configure latency alerts
diff --git a/docs/architecture/systems/codeflow-engine.md b/docs/architecture/systems/codeflow-engine.md
new file mode 100644
index 0000000..30f5dfd
--- /dev/null
+++ b/docs/architecture/systems/codeflow-engine.md
@@ -0,0 +1,165 @@
+# CodeFlow Engine
+
+CodeFlow Engine is a DevOps and CI/CD intelligence system. **This is one of the most natural SLM fits** — CI/CD emits lots of repetitive semi-structured text where SLMs excel.
+
+## Architecture
+
+```
+Git Push / PR Event
+      │
+      ▼
+┌─────────────────────────────────────┐
+│         SLM Triage Layer             │
+│  (classification, risk, pipeline)    │
+└─────────────────────────────────────┘
+      │
+      ▼
+CI/CD Decision
+      │
+      ├─→ Auto approve
+      ├─→ Run tests (full/minimal/skip)
+      ├─→ Security review
+      └─→ Escalate to LLM
+```
+
+## Best SLM Use Cases
+
+| Use Case                 | Description               | Example Output                                                |
+| ------------------------ | ------------------------- | ------------------------------------------------------------- |
+| PR Classification        | Categorize change type    | `{ "type": "api_contract", "risk": "high" }`                  |
+| Test Selection           | Choose which tests to run | `{ "run_unit": true, "run_integration": false }`              |
+| Blast Radius             | Estimate change impact    | `{ "impacted": ["schemas", "api"], "risk": "medium" }`        |
+| Changelog Category       | Generate release notes    | `{ "category": "feature", "component": "gateway" }`           |
+| Build Log Classification | Diagnose failures         | `{ "failure": "dependency_error", "fix": "npm install" }`     |
+| Flaky Test Grouping      | Identify test patterns    | `{ "flaky_group": "network_timed_out" }`                      |
+| Issue Routing            | Route to component owners | `{ "component": "infrastructure", "owner": "platform-team" }` |
+
+## Example SLM Outputs
+
+### PR Classification
+
+```json
+{
+  "change_type": "api_contract",
+  "risk": "high",
+  "requires_full_ci": true,
+  "security_review": false,
+  "impacted_domains": ["schemas", "api"],
+  "suggested_reviewers": ["platform-team"]
+}
+```
+
+### Failure Diagnosis
+
+```json
+{
+  "failure_type": "dependency_resolution",
+  "retryable": false,
+  "likely_root_cause": "missing package lock update",
+  "suggested_action": "regenerate lock file and rerun"
+}
+```
+
+## Why This Works
+
+CI/CD emits lots of repetitive semi-structured text:
+
+- Similar commit patterns
+- Recurring error types
+- Predictable change categories
+
+SLMs do very well at pattern recognition on this data.
+
+## Implementation
+
+### PR Classification
+
+```python
+async def classify_pr(pr_diff: str, pr_description: str) -> PRClassification:
+    prompt = f"""Classify this PR:
+
+Diff (first 2000 chars): {pr_diff[:2000]}
+Description: {pr_description}
+
+Output JSON with: type, risk_level, tests_required, reviewers_needed, security_review"""
+
+    result = await slm_completion(prompt)
+    return PRClassification.parse_json(result)
+```
+
+### Test Selection
+
+```python
+async def select_tests(change_type: str, impacted_files: list[str]) -> TestPlan:
+    prompt = f"""Select tests for this change:
+
+Type: {change_type}
+Files: {', '.join(impacted_files)}
+
+Output: { "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }"""
+
+    return await slm_completion(prompt)
+```
+
+### Failure Diagnosis
+
+```python
+async def diagnose_failure(build_log: str) -> Diagnosis:
+    prompt = f"""Diagnose this CI failure:
+
+Log (last 5000 chars):
+{build_log[-5000:]}
+
+Output: failure_type, retryable, likely_root_cause, suggested_action"""
+
+    return await slm_completion(prompt)
+```
+
+### Auto-Rules Mapping
+
+```python
+CLASSIFICATION_ACTIONS = {
+    ("docs", "low"): {"auto_merge": True, "ci_skip": True},
+    ("feat", "low"): {"auto_merge": False, "ci_full": True},
+    ("fix", "medium"): {"auto_merge": False, "ci_full": True, "security_review": True},
+    ("refactor", "low"): {"auto_merge": True, "ci_minimal": True},
+    ("api_contract", "high"): {"auto_merge": False, "ci_full": True, "security_review": True},
+}
+```
+
+## Tradeoffs
+
+| Pros                                | Cons                                              |
+| ----------------------------------- | ------------------------------------------------- |
+| Cheaper automated repo intelligence | Incorrect risk can under-test changes             |
+| Better developer feedback speed     | Failure summarization may miss subtle root causes |
+| Fewer wasted full-pipeline runs     | Rules should never override hard safety gates     |
+
+## Key Concerns
+
+| Concern  | Strategy                                        |
+| -------- | ----------------------------------------------- |
+| Speed    | SLM must complete in <2s                        |
+| Accuracy | Validate against rules; escalate on uncertainty |
+| Cost     | Batch processing; SLM only for classification   |
+| Coverage | Handle all common CI scenarios                  |
+
+## Classification Types
+
+| Change Type   | SLM Output   | CI Action          |
+| ------------- | ------------ | ------------------ |
+| documentation | risk: low    | skip tests         |
+| bugfix        | risk: medium | run tests          |
+| refactor      | risk: low    | run tests          |
+| security      | risk: high   | full review        |
+| breaking      | risk: high   | require approval   |
+| api_contract  | risk: high   | full CI + security |
+
+## Implementation Checklist
+
+- [ ] Add PR classification with structured output
+- [ ] Implement test selection hints
+- [ ] Add blast radius estimation
+- [ ] Implement failure diagnosis with suggested actions
+- [ ] Set up changelog category generation
+- [ ] Configure auto-merge rules
diff --git a/docs/architecture/systems/cognitive-mesh.md b/docs/architecture/systems/cognitive-mesh.md
new file mode 100644
index 0000000..d4f2c96
--- /dev/null
+++ b/docs/architecture/systems/cognitive-mesh.md
@@ -0,0 +1,175 @@
+# Cognitive Mesh
+
+Cognitive Mesh architectures orchestrate multiple AI agents and tools. The SLM is the **control fabric** that decides which specialist acts, whether decomposition is needed, what context is necessary, and when to escalate.
+
+## Architecture
+
+```
+User Query
+      │
+      ▼
+┌─────────────────────────────────────┐
+│         SLM Control Fabric           │
+│  (routing, decomposition, compression)│
+└─────────────────────────────────────┘
+      │
+      ▼
+Routing Decision
+      │
+      ├─→ Code Agent
+      ├─→ Infra Agent
+      ├─→ Security Agent
+      └─→ Research Agent
+            │
+            ▼
+      Specialist Work
+            │
+            ▼
+      LLM Synthesis (only when needed)
+```
+
+## Strong SLM Roles in Cognitive Mesh
+
+### 1. Router
+
+Pick which specialist or workflow handles the request.
+
+```json
+{
+  "agent": "code_agent",
+  "confidence": 0.94,
+  "reasoning": "User is asking about refactoring"
+}
+```
+
+### 2. Task Decomposer
+
+Break one request into bounded subtasks.
+
+**Example:**
+
+User: "Analyze this repo and generate a deployment plan."
+
+SLM decomposition:
+
+1. repository structure analysis
+2. dependency inventory
+3. infrastructure detection
+4. deployment strategy generation
+
+Only the final step requires LLM.
+
+### 3. Context Compressor
+
+Reduce token load before LLM synthesis.
+
+```json
+{
+  "summary": "User wants Azure cost analysis",
+  "relevant_files": ["infra/main.tf", "infra/outputs.tf"],
+  "active_task": "generating cost breakdown",
+  "pruned_messages": 12
+}
+```
+
+### 4. Failure Classifier
+
+Classify failures to determine retry strategy:
+
+```json
+{
+  "failure_type": "tool_error",
+  "retryable": true,
+  "cause": "transient_network",
+  "action": "retry_with_backoff"
+}
+```
+
+## Practical Pattern
+
+A good mesh uses:
+
+1. **SLM first** — routing, decomposition
+2. **Tools/specialists second** — execution
+3. **LLM only for synthesis** — or when ambiguous
+
+## Implementation
+
+### Agent Selection
+
+```python
+async def select_agent(user_request: str) -> Agent:
+    # SLM determines best agent
+    classification = await slm_classify_intent(user_request)
+
+    agent_map = {
+        "code": CodeAgent,
+        "infrastructure": InfraAgent,
+        "security": SecurityAgent,
+        "research": ResearchAgent,
+    }
+
+    return agent_map[classification.agent]
+```
+
+### Task Decomposition
+
+```python
+async def decompose_task(request: str) -> TaskPlan:
+    # SLM breaks down into subtasks
+    decomposition = await slm_decompose(request)
+
+    return TaskPlan(
+        subtasks=decomposition.steps,
+        dependencies=decomposition.dependencies,
+        llm_required_at_step=decomposition.final_step_only
+    )
+```
+
+### Context Compression
+
+```python
+async def compress_context(messages: list[Message]) -> Compressed:
+    summary = await slm_summarize(messages)
+
+    return Compressed(
+        summary=summary.state,
+        relevant=summary.relevant_messages,
+        token_estimate=summary.tokens
+    )
+```
+
+## Tradeoffs
+
+| Pros                            | Cons                                            |
+| ------------------------------- | ----------------------------------------------- |
+| Very large token savings        | Decomposition quality can bottleneck workflow   |
+| Better determinism              | Brittle routing if taxonomy is poor             |
+| Easier specialist orchestration | Harder debugging if confidence handling is weak |
+| Improved auditability           |                                                 |
+
+## Key Concerns
+
+| Concern            | Strategy                                  |
+| ------------------ | ----------------------------------------- |
+| Routing accuracy   | Validate against known agent capabilities |
+| Task complexity    | SLM estimates; LLM confirms if wrong      |
+| Agent coordination | SLM manages task queue and dependencies   |
+| Failure detection  | SLM monitors logs; LLM only for recovery  |
+
+## Agent Capabilities Matrix
+
+| Agent    | SLM Handles                    | LLM Required For    |
+| -------- | ------------------------------ | ------------------- |
+| Code     | file operations, git commands  | complex refactoring |
+| Infra    | terraform plans, status checks | architecture design |
+| Security | vulnerability scanning         | threat analysis     |
+| Research | information retrieval          | synthesis           |
+
+## Implementation Checklist
+
+- [ ] Define agent taxonomy with capabilities
+- [ ] Implement SLM router with structured output
+- [ ] Add task decomposition with bounded subtasks
+- [ ] Implement context compression before LLM
+- [ ] Add failure classification for retry logic
diff --git a/docs/architecture/systems/mystira.md b/docs/architecture/systems/mystira.md
new file mode 100644
index 0000000..17079e7
--- /dev/null
+++ b/docs/architecture/systems/mystira.md
@@ -0,0 +1,584 @@
+# Mystira
+
+Mystira is an interactive story generation system for children. The SLM serves as a **content-shaping, moderation, personalization, and cost-control layer** around story generation and interactive experience flows.
+
+## Architecture Overview
+
+```mermaid
+flowchart TB
+    subgraph User["User Layer"]
+        U[Child/Parent Input]
+        P[Parent Controls]
+    end
+
+    subgraph SLML["SLM Experience Control Layer"]
+        C[Story Classifier]
+        A[Age-Tone Controller]
+        M[Moderation Filter]
+        W[World Consistency]
+    end
+
+    subgraph State["State Management"]
+        S[Story State]
+        Pr[Child Profile]
+        Ch[Character Registry]
+    end
+
+    subgraph Content["Content Pipeline"]
+        PC[Prompt Constructor]
+        LLM[Story LLM]
+        IMG[Illustration Generator]
+    end
+
+    subgraph Output["Output Processing"]
+        SA[Safety Audit]
+        RL[Reading Level Check]
+        IP[Image Prompt Shaper]
+    end
+
+    U --> C
+    P --> Pr
+    C --> A
+    A --> M
+    M --> W
+    W --> S
+    S --> Pr
+    Pr --> PC
+    PC --> LLM
+    LLM --> SA
+    SA --> RL
+    RL --> IMG
+    IMG --> IP
+    PC --> Ch
+    Ch --> W
+```
+
+## Detailed Data Flow
+
+```mermaid
+sequenceDiagram
+    participant U as User Input
+    participant SLM as SLM Control Layer
+    participant SS as Story State
+    participant LLM as Story LLM
+    participant IMG as Image Service
+    participant MOD as Moderation
+
+    U->>SLM: Story request
+    SLM->>SLM: Classify request type
+    SLM->>SLM: Check age appropriateness
+    SLM->>SLM: Validate parental controls
+    SLM->>SS: Update session context
+    SLM->>SS: Compress memory if needed
+
+    alt Simple continuation
+        SLM->>SLM: Generate simple continuation
+        SLM->>MOD: Quick safety check
+        SLM->>U: Return response
+    else Full story generation
+        SLM->>LLM: Prepare enriched prompt
+        LLM->>SLM: Generated story
+        SLM->>SLM: Validate continuity
+        SLM->>MOD: Full moderation check
+        SLM->>SLM: Adapt reading level
+        SLM->>IMG: Shape image prompts
+        IMG->>SLM: Generated illustrations
+        SLM->>U: Return enriched story
+    end
+```
+
+## SLM as Experience Orchestrator
+
+The SLM sits between:
+
+1. **User input** — Classification, safety pre-check, parental control validation
+2. **Story state / profile state** — Memory compression, continuity tracking
+3. **Generation pipeline** — Prompt enrichment, context window management
+4. **Illustration / asset prompts** — Style consistency, character adherence
+5. **Moderation / age-appropriateness checks** — Multi-layer safety filtering
+
+```mermaid
+flowchart LR
+    subgraph Input["Input Processing"]
+        I1[User Request]
+        I2[Parent Settings]
+        I3[Session History]
+    end
+
+    subgraph SLM["SLM Decision Points"]
+        S1[Request Classification]
+        S2[Age-Tone Mapping]
+        S3[Safety Filter]
+        S4[Memory Compression]
+    end
+
+    subgraph Decision["Routing Decision"]
+        D1{Complexity?}
+        D1 -->|Simple| R1[SLM Direct]
+        D1 -->|Complex| D2{Age Appropriate?}
+        D2 -->|Yes| R2[LLM Generation]
+        D2 -->|No| R3[Safe Rewrite]
+    end
+
+    subgraph Output["Output Processing"]
+        O1[Continuity Check]
+        O2[Image Prompt]
+        O3[Reading Level]
+        O4[Final Safety]
+    end
+
+    I1 --> S1
+    I2 --> S2
+    I3 --> S4
+    S1 --> D1
+    S2 --> D1
+    S3 --> Decision
+    Decision --> Output
+```
+
+## Best SLM Use Cases
+
+### 1. Story Request Classification
+
+Determine request type:
+
+```json
+{
+  "story_type": "bedtime|learning|adventure|interactive|customization|continuation|image",
+  "age_range": "3-5|5-8|8-10|10-12",
+  "is_interactive": true,
+  "continuation": true,
+  "needs_images": true,
+  "curriculum_tags": ["kindness", "sharing", "animals"],
+  "estimated_complexity": "low|medium|high"
+}
+```
+
+### 2. Age and Tone Control
+
+Enforce cheaply:
+
+```json
+{
+  "reading_level": "easy|moderate|advanced",
+  "sentence_length": "short|medium|long",
+  "emotional_tone": "calm|exciting|gentle|funny",
+  "safe_themes": true,
+  "lesson_alignment": ["kindness", "courage"],
+  "content_rating": "G|PG",
+  "prohibited_elements": []
+}
+```
+
+### 3. Moderation and Safe Rewriting
+
+Catch or rewrite:
+
+- Frightening content
+- Inappropriate content
+- Emotionally unsuitable scenes
+- Unsafe user prompts
+- Age-inappropriate vocabulary
+
+```json
+{
+  "flagged": false,
+  "rewritten": null,
+  "content_rating": "safe|caution|blocked",
+  "age_appropriate": true,
+  "concerns": [],
+  "rewrite_suggestions": []
+}
+```
+
+### 4. Session Memory Compression
+
+Keep only essential state:
+
+```json
+{
+  "session_id": "abc123",
+  "active_characters": ["Luna", "Bear"],
+  "current_quest": "find_moon",
+  "tone_constraints": "gentle_adventure",
+  "age_band": "3-5",
+  "plot_anchors": ["discovered_moon_stone", "met_starlight_friend"],
+  "character_states": {
+    "Luna": { "mood": "curious", "location": "forest_edge" },
+    "Bear": { "mood": "helpful", "location": "forest_edge" }
+  },
+  "reader_preferences": { "likes": ["animals", "stars"], "dislikes": ["scary"] }
+}
+```
+
+### 5. Character and World Consistency
+
+Validate:
+
+- Names remain consistent
+- World rules not violated
+- Prior events respected
+- Visual prompts align with canon
+
+```json
+{
+  "valid": true,
+  "inconsistencies": [],
+  "suggested_corrections": [],
+  "world_rules_violated": [],
+  "character_continuity_ok": true
+}
+```
+
+### 6. Illustration Prompt Shaping
+
+Convert story scene to constrained image prompts:
+
+```json
+{
+  "prompt": "Luna the fox and Bear walking through moonlit forest, children's book style, soft colors, no scary elements",
+  "style": "children_book",
+  "style_params": {
+    "illustration_type": "watercolor",
+    "color_palette": "warm",
+    "lighting": "soft_moonlight"
+  },
+  "character_refs": ["luna", "bear"],
+  "safety_check": "passed",
+  "age_appropriate": true,
+  "brand_compliant": true
+}
+```
+
+## Implementation
+
+### Story Classification
+
+```python
+async def classify_story_request(
+    user_input: str,
+    session: Session,
+    profile: ChildProfile
+) -> StoryClassification:
+    prompt = f"""Classify this story request:
+
+User input: {user_input}
+Session history: {session.summary}
+Child age band: {profile.age_band}
+Parent settings: {profile.parent_controls}
+
+Output as JSON with fields:
+- story_type: bedtime|learning|adventure|interactive|customization|continuation|image
+- age_range: target age range
+- is_interactive: boolean
+- needs_images: boolean
+- curriculum_tags: array of educational tags
+- complexity: low|medium|high"""
+
+    return await slm_completion(prompt, schema=StoryClassification)
+```
+
+### Age and Tone Control
+
+```python
+async def enforce_age_tone(
+    content: str,
+    profile: ChildProfile
+) -> ControlledContent:
+    prompt = f"""Adapt content for age group:
+
+Content: {content[:1000]}
+Age band: {profile.age_band}
+Profile preferences: {profile.preferences}
+Parent tone settings: {profile.parent_tone_settings}
+
+Output as JSON:
+- adapted_content: rewritten content
+- reading_level: easy|moderate|advanced
+- safety_flag: boolean
+- concerns: array of any issues"""
+
+    return await slm_completion(prompt, schema=ControlledContent)
+```
+
+### Safe Rewriting
+
+```python
+async def safe_rewrite(content: str, age_band: str) -> RewriteResult:
+    prompt = f"""Rewrite for safety:
+
+Content: {content[:2000]}
+Age band: {age_band}
+
+If content is safe: return unchanged with "safe" status.
+If content needs rewriting: return rewritten version with reason.
+If content is unsafe: return blocked with specific reason.
+
+Output as JSON:
+- status: safe|rewritten|blocked
+- original: original content
+- result: content after rewrite (if applicable)
+- reason: explanation"""
+
+    return await slm_completion(prompt, schema=RewriteResult)
+```
+
+### Memory Compression
+
+```python
+async def compress_session(session: Session) -> CompressedSession:
+    prompt = f"""Compress session memory for story continuity:
+
+Current session messages: {session.messages[-20:]}
+Active characters: {session.characters}
+Current plot state: {session.plot_state}
+
+Output as JSON:
+- summary: 2-3 sentence story summary
+- active_characters: array of character names with key traits
+- current_quest: current story goal or "none"
+- plot_anchors: array of key events that must be remembered
+- tone_constraints: current tone settings
+- age_band: current age target"""
+
+    return await slm_completion(prompt, schema=CompressedSession)
+```
+
+### Illustration Prompt Shaping
+
+```python
+async def shape_image_prompt(
+    scene: StoryScene,
+    characters: list[Character],
+    brand_guidelines: BrandGuidelines
+) -> ImagePrompt:
+    prompt = f"""Create child-safe, brand-aligned image prompt:
+
+Scene: {scene.description}
+Characters: {format_characters(characters)}
+Story tone: {scene.tone}
+Brand style: {brand_guidelines.style}
+
+Output as JSON:
+- prompt: complete image generation prompt
+- style: illustration style
+- style_params: detailed style parameters
+- character_refs: references to character assets
+- safety_check: passed|needs_review|failed
+- age_appropriate: boolean
+- brand_compliant: boolean"""
+
+    return await slm_completion(prompt, schema=ImagePrompt)
+```
+
+## Implementation Matrix
+
+### SLM Endpoints
+
+| Function             | Endpoint        | Model      | Latency Target |
+| -------------------- | --------------- | ---------- | -------------- |
+| Story Classification | `/classify`     | Phi-3 Mini | <100ms         |
+| Age-Tone Control     | `/age-tone`     | Phi-3 Mini | <100ms         |
+| Safe Rewrite         | `/safewrite`    | Llama 3 8B | <200ms         |
+| Memory Compression   | `/compress`     | Phi-3 Mini | <100ms         |
+| Consistency Check    | `/validate`     | Phi-3 Mini | <100ms         |
+| Image Prompt         | `/image-prompt` | Phi-3 Mini | <100ms         |
+
+### Contract Shapes
+
+```typescript
+interface StoryClassification {
+  story_type: StoryType;
+  age_range: AgeRange;
+  is_interactive: boolean;
+  continuation: boolean;
+  needs_images: boolean;
+  curriculum_tags: string[];
+  complexity: Complexity;
+  confidence: number;
+}
+
+interface ControlledContent {
+  adapted_content: string;
+  reading_level: ReadingLevel;
+  safety_flag: boolean;
+  concerns: string[];
+  confidence: number;
+}
+
+interface CompressedSession {
+  summary: string;
+  active_characters: CharacterSummary[];
+  current_quest: string | null;
+  plot_anchors: string[];
+  tone_constraints: ToneConstraints;
+  age_band: AgeBand;
+}
+
+interface ImagePrompt {
+  prompt: string;
+  style: IllustrationStyle;
+  style_params: StyleParams;
+  character_refs: string[];
+  safety_check: SafetyStatus;
+  age_appropriate: boolean;
+  brand_compliant: boolean;
+}
+```
+
+### Telemetry Fields
+
+| Field               | Type    | Description                    |
+| ------------------- | ------- | ------------------------------ |
+| `request_id`        | string  | Unique request identifier      |
+| `session_id`        | string  | Story session identifier       |
+| `timestamp`         | ISO8601 | Request timestamp              |
+| `slm_model`         | string  | SLM model used                 |
+| `function`          | string  | Classification function called |
+| `latency_ms`        | number  | SLM processing time            |
+| `confidence`        | number  | Model confidence score         |
+| `routed_to_llm`     | boolean | Whether LLM was invoked        |
+| `age_band`          | string  | Target age range               |
+| `story_type`        | string  | Classified story type          |
+| `safety_flagged`    | boolean | Content was flagged            |
+| `content_rewritten` | boolean | Content was rewritten          |
+| `tokens_used`       | number  | Total tokens consumed          |
+| `cost_usd`          | number  | Estimated cost                 |
+
+### Fallback Rules
+
+| Condition                   | Action                             |
+| --------------------------- | ---------------------------------- |
+| SLM confidence < 0.7        | Escalate to LLM for classification |
+| SLM timeout                 | Use deterministic rules fallback   |
+| Moderation flag = "blocked" | Return safe error to user          |
+| Age band mismatch           | Enforce age-appropriate rewrite    |
+| Consistency check fails     | Notify, allow LLM override         |
+| Image prompt fails safety   | Use default safe prompt            |
+
+### Confidence Thresholds Flowchart
+
+```mermaid
+flowchart TD
+    A[Classification Result] --> B{Confidence >= 0.9?}
+    B -->|Yes| C[Use SLM Result]
+    B -->|No| D{Confidence >= 0.7?}
+    D -->|Yes| E{LLM Verification}
+    E -->|Agree| C
+    E -->|Disagree| F[Use LLM Result]
+    D -->|No| F
+    F --> G[Log Discrepancy]
+```
+
+## Tradeoffs
+
+| Pros                                     | Cons                                                  |
+| ---------------------------------------- | ----------------------------------------------------- |
+| Lowers cost for interactive sessions     | SLMs are weaker for rich narrative creativity         |
+| Improves safety and consistency          | Overuse can make stories feel templated               |
+| Helps maintain story canon               | Compression may lose subtle emotional continuity      |
+| Enables scalable personalization         | Moderation can become too restrictive if tuned poorly |
+| Reduces unnecessary LLM for simple steps | Image prompts may lack artistic nuance                |
+
+## Correct Role
+
+| Use SLM For     | Use LLM For                 |
+| --------------- | --------------------------- |
+| Preparation     | Rich storytelling           |
+| Guardrails      | Emotionally nuanced scenes  |
+| Continuity      | Narrative synthesis         |
+| Personalization | Creative expansions         |
+| Prompt shaping  | Final polished storytelling |
+
+## Combined Cross-System Architecture
+
+```mermaid
+flowchart TB
+    U[Users / Apps / Operators / Dev Events]
+
+    U --> G[AI Gateway]
+    G --> G1[SLM: intent + safety + routing]
+    G1 --> G2{Path}
+
+    G2 -->|agentic work| M[Cognitive Mesh]
+    G2 -->|repo / CI work| C[CodeFlow Engine]
+    G2 -->|tooling / automation| A[AgentKit Forge]
+    G2 -->|creative storytelling| Y[Mystira]
+    G2 -->|simple response| S[Small / Mid Model]
+    G2 -->|deep reasoning| L[Large Model]
+
+    M --> M1[SLM: specialist router]
+    M1 --> M2[Architecture Agent]
+    M1 --> M3[Infra Agent]
+    M1 --> M4[Security Agent]
+    M1 --> M5[Research / Cost Agent]
+    M2 --> X[Shared State / Evidence]
+    M3 --> X
+    M4 --> X
+    M5 --> X
+    X --> M6[SLM: context compressor]
+    M6 --> L
+
+    C --> C1[SLM: PR / CI classifier]
+    C1 --> C2[Pipeline policy]
+    C2 --> C3[Fast path / full path / contract checks]
+
+    A --> A1[SLM: tool selector]
+    A1 --> A2[GitHub]
+    A1 --> A3[Azure]
+    A1 --> A4[Terraform]
+    A1 --> A5[Kusto / Docs]
+
+    Y --> Y1[SLM: age-fit + moderation + continuity]
+    Y1 --> Y2[Story Model / Creative LLM]
+    Y2 --> Y3[SLM: consistency + reading level + image prompt shaping]
+
+    R[PhoenixRooivalk Edge] --> R1[Fusion / Threat Scoring]
+    R1 --> R2[SLM: operator interpretation]
+    R2 --> R3[Console / Incident Reports]
+
+    L --> Z[Final synthesis / high-complexity outputs]
+```
+
+## Platform Comparison
+
+| Platform        | Best SLM Role                                   | Should SLM be Primary?    | Escalate to LLM When                             |
+| --------------- | ----------------------------------------------- | ------------------------- | ------------------------------------------------ |
+| AI Gateway      | routing, safety, cost control                   | **yes**                   | ambiguity, complex reasoning                     |
+| Cognitive Mesh  | agent routing, decomposition, compression       | **yes**                   | cross-agent synthesis needed                     |
+| CodeFlow Engine | PR/CI triage, failure summaries                 | **yes**                   | root cause requires deep analysis                |
+| AgentKit Forge  | tool selection, memory shaping                  | **yes**                   | planning becomes ambiguous or multi-step         |
+| PhoenixRooivalk | operator summaries, reports                     | **no**                    | strategic analysis or long-form reporting        |
+| **Mystira**     | moderation, age-fit, continuity, prompt shaping | **yes** for control layer | rich storytelling, emotionally nuanced narrative |
+
+## Key Concerns
+
+| Concern             | Strategy                                                     |
+| ------------------- | ------------------------------------------------------------ |
+| Safety              | SLM pre-filter + LLM post-filter + deterministic rules       |
+| Age-appropriateness | Hard rules for age bands + SLM adaptation                    |
+| Story continuity    | SLM validates consistency with plot anchors                  |
+| Cost                | Route simple steps through SLM; LLM only for rich generation |
+| Creativity          | Reserve LLM for emotionally nuanced storytelling             |
+| Parental controls   | Deterministic rules + SLM suggestion refinement              |
+| Brand consistency   | SLM enforces brand guidelines in image prompts               |
+
+## Canonical Principle for Mystira
+
+> **Use SLMs to make stories safe, consistent, and affordable.**
+> **Use LLMs to make them magical.**
+
+## Implementation Checklist
+
+- [ ] Add story request classification endpoint
+- [ ] Implement age and tone control pipeline
+- [ ] Add moderation and safe rewriting
+- [ ] Implement session memory compression
+- [ ] Add character/world consistency validation
+- [ ] Implement illustration prompt shaping
+- [ ] Set up cost tracking per session type
+- [ ] Configure confidence threshold cascades
+- [ ] Add parental controls integration
+- [ ] Implement brand guidelines enforcement
+- [ ] Add telemetry and observability
+- [ ] Set up fallback deterministic rules
diff --git a/docs/architecture/systems/phoenix-rooivalk.md b/docs/architecture/systems/phoenix-rooivalk.md
new file mode 100644
index 0000000..dedaf96
--- /dev/null
+++ b/docs/architecture/systems/phoenix-rooivalk.md
@@ -0,0 +1,180 @@
+# PhoenixRooivalk
+
+PhoenixRooivalk is an edge AI counter-UAS (Unmanned Aerial System) system. **SLM must NOT be the primary kinetic or safety-critical decision-maker** — it sits in interpretation and operator-support layer only.
+
+## Architecture
+
+```
+Sensors
+  │
+  ▼
+┌─────────────────────────────────────┐
+│  Rules + Signal Models + Fusion      │
+│   (core detection - NOT SLM)        │
+└─────────────────────────────────────┘
+  │
+  ▼
+Threat Detection
+  │
+  ▼
+┌─────────────────────────────────────┐
+│      SLM Interpretation Layer        │
+│  (summaries, reports, narratives)    │
+└─────────────────────────────────────┘
+  │
+  ▼
+Operator Console
+```
+
+## Critical Principle
+
+> Use **rules + signal models + fusion engine** for core detection.
+> Use **SLM only** for human-readable interpretation and workflow assistance.
+
+**Never use SLM for:**
+
+- Primary safety-critical actuation
+- Final kinetic authorization
+- Real-time hard control loops
+- Deterministic low-level signal classification (use classical/ML models)
+
+## Good SLM Use Cases
+
+| Use Case               | Description                 | Output                                   |
+| ---------------------- | --------------------------- | ---------------------------------------- |
+| Alert Summaries        | Format alerts for operators | "Drone approaching from NW at 35m"       |
+| Event Clustering       | Group similar events        | `{ "cluster": "loitering", "count": 3 }` |
+| Post-Mission Narrative | Generate mission reports    | Full structured report                   |
+| SOP Lookup             | Suggest procedures          | `{ "sop": "perimeter breach" }`          |
+| Incident Drafting      | Draft incident reports      | Human-readable report                    |
+| Telemetry Translation  | Convert raw to text         | "RF signature consistent with..."        |
+
+## Example SLM Outputs
+
+### Alert Summary
+
+```json
+{
+  "summary": "Drone detected approaching perimeter at 35m altitude",
+  "classification": "suspicious",
+  "confidence": 0.74,
+  "relevant_sensors": ["radar", "rf"],
+  "operator_action": "monitor"
+}
+```
+
+### Post-Mission Narrative
+
+```
+Mission Summary:
+- Duration: 45 minutes
+- Events detected: 3
+- Threats: 1 (non-critical)
+- Actions taken: Monitor mode
+
+Key Event:
+14:32 - Drone detected approaching perimeter from NW
+Classification: Consumer quadcopter (RF signature match)
+Resolution: Left area at 14:38
+```
+
+## Implementation
+
+### Edge Processing Pipeline
+
+```python
+class EdgeProcessor:
+    def __init__(self):
+        self.slm = load_local_slm()  # Gemma or Phi-3
+
+    async def process_telemetry(self, raw_stream: bytes) -> ProcessedEvent:
+        # Core detection is NOT SLM - rules + signal models
+        detection = self.fusion_engine.process(raw_stream)
+
+        if detection.threat_level > THRESHOLD:
+            # SLM only for human interpretation
+            summary = await self.slm.summarize(detection)
+
+        return ProcessedEvent(
+            detection=detection,
+            summary=summary,  # SLM output
+            timestamp=datetime.utcnow()
+        )
+```
+
+### Alert Formatting
+
+```python
+async def format_alert(detection: Detection) -> OperatorAlert:
+    prompt = f"""Format this detection for operator:
+
+Radar: {detection.radar_summary}
+RF: {detection.rf_signature}
+Flight: {detection.flight_pattern}
+
+Output: summary, classification, recommended_action"""
+
+    return await slm_completion(prompt)
+```
+
+### Report Generation
+
+```python
+async def generate_mission_report(events: list[Event]) -> MissionReport:
+    prompt = f"""Generate post-mission report:
+
+Events: {format_events(events)}
+Duration: {mission.duration}
+
+Output: structured report with key findings"""
+
+    return await slm_completion(prompt)
+```
+
+## Tradeoffs
+
+| Pros                          | Cons                                                         |
+| ----------------------------- | ------------------------------------------------------------ |
+| Better operator comprehension | Hallucinated interpretations dangerous if presented as facts |
+| Faster report generation      | Must clearly separate inferred from sensor facts             |
+| Reduced cognitive load        | Offline edge deployment constraints                          |
+
+## Key Concerns
+
+| Concern                   | Strategy                                     |
+| ------------------------- | -------------------------------------------- |
+| Safety-critical decisions | Never use SLM for actuation                  |
+| Hallucination             | Clearly label SLM output as "interpretation" |
+| Edge constraints          | Optimize SLM for edge (quantization)         |
+| Offline operation         | Full local inference capability              |
+
+## Hardware Options
+
+| Device      | SLM Capability    | Notes                   |
+| ----------- | ----------------- | ----------------------- |
+| Jetson Nano | Phi-3 Mini (int4) | ~5ms inference          |
+| Jetson Orin | Phi-3 Mini (fp16) | Real-time processing    |
+| Edge CPU    | Gemma 2B          | Offline fallback        |
+| Mobile SoC  | Phi-3 Mini (int4) | Phone/tablet deployment |
+
+## Model Optimization
+
+```python
+# Quantize for edge deployment
+from optimum.quanto import quantize
+
+model = quantize(
+    original_model,
+    weights=quantization_type.q4,
+    activations=quantization_type.q8
+)
+```
+
+## Implementation Checklist
+
+- [ ] Separate SLM from core detection pipeline
+- [ ] Implement alert summarization for operators
+- [ ] Add post-mission narrative generation
+- [ ] Clearly label SLM output vs sensor facts
+- [ ] Optimize for edge deployment
+- [ ] Test offline operation
diff --git a/docs/guides/README.md b/docs/guides/README.md
new file mode 100644
index 0000000..a6ea1d0
--- /dev/null
+++ b/docs/guides/README.md
@@ -0,0 +1,25 @@
+# Guides
+
+Implementation guides for various topics.
+
+## SLM Implementation
+
+- [README](README.md) - Practical SLM implementation patterns, when to use SLM vs LLM
+
+## Architecture Reference
+
+See [docs/architecture](../architecture/) for detailed system documentation:
+
+- AI Gateway — SLM as admission control
+- Cognitive Mesh — Agent orchestration
+- PhoenixRooivalk — Edge AI (reports only)
+- CodeFlow Engine — CI/CD intelligence
+- AgentKit Forge — Agent building
+
+## Coming Soon
+
+- AI Gateway deployment guide
+- Cognitive Mesh setup guide
+- Edge deployment guide (PhoenixRooivalk)
+- CodeFlow Engine integration
+- AgentKit Forge quickstart
diff --git a/docs/planning/request_to_token_attribution.md b/docs/planning/request_to_token_attribution.md
index 34a282c..d5790c1 100644
--- a/docs/planning/request_to_token_attribution.md
+++ b/docs/planning/request_to_token_attribution.md
@@ -65,11 +65,11 @@ LiteLLM's OTEL callback automatically emits spans with:
 
 ### Phase 2: Correlation ID Propagation
 
-**Status: In Progress**
+**Status: ✅ Done**
 
 Correlation IDs flow through the system in two ways:
 
-**Method A: Via Request Metadata (Recommended)**
+**Method A: Via Request Metadata (Implemented)**
 Pass correlation IDs in the request body `metadata` field:
 
 ```json
@@ -122,7 +122,7 @@ Start with downstream aggregation in pvc-costops-analytics - the cheapest and fa
 
 ### 1. cognitive-mesh (Upstream Caller)
 
-**Required:** Must pass correlation headers when calling gateway. There are two methods:
+**Required:** Pass correlation metadata in request body when calling gateway. There are two methods:
 
 **Method A: Via Request Metadata (Recommended)**
 Pass correlation IDs in the request body `metadata` field:
@@ -207,18 +207,18 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 ## Acceptance Criteria
 
-| Criterion                                    | Status     | Notes                                     |
-| -------------------------------------------- | ---------- | ----------------------------------------- |
-| 100% of LLM calls emit token telemetry       | ✅ Done    | Via OTEL callback                         |
-| 100% include workflow + stage                | ⚠️ Partial | Requires upstream to pass metadata        |
-| Support KQL joins by operation_Id/request_id | ✅ Done    | OTEL spans include metadata               |
-| Request-completion rollup totals             | 🔜 Future  | Requires Phase 3 (downstream aggregation) |
+| Criterion                                    | Status    | Notes                                                   |
+| -------------------------------------------- | --------- | ------------------------------------------------------- |
+| 100% of LLM calls emit token telemetry       | ✅ Done   | Via OTEL callback                                       |
+| 100% include workflow + stage                | 🔜 Ready  | Requires cognitive-mesh to pass metadata to gateway     |
+| Support KQL joins by operation_Id/request_id | 🔜 Ready  | Requires pvc-costops-analytics to implement KQL queries |
+| Request-completion rollup totals             | 🔜 Future | Requires Phase 3 (downstream aggregation)               |
 
 ## Dependencies
 
-- cognitive-mesh: Must pass correlation headers to gateway
+- cognitive-mesh: Pass correlation metadata in request body
 - pvc-costops-analytics: Must create KQL queries for new event shape
-- infra: May need custom LiteLLM container image or OTEL collector
+- infra: Application Insights resource + APPLICATIONINSIGHTS_CONNECTION_STRING wiring added; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default)
 
 ## Action Items
 
@@ -226,12 +226,13 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 1. ✅ ai-gateway: Add OTEL callback for token telemetry (Phase 1)
 2. ✅ ai-gateway: Document correlation ID requirements (Phase 2)
+3. ✅ ai-gateway: Add Application Insights connection string wiring (Phase 1b - trace export requires custom image or OTLP collector)
 
 ### Pending
 
-3. cognitive-mesh: Pass correlation IDs in request metadata
-4. pvc-costops-analytics: Create KQL queries for OTEL span joins
-5. pvc-costops-analytics: Implement request rollup aggregation (Phase 3)
+4. cognitive-mesh: Pass correlation metadata in request body
+5. pvc-costops-analytics: Create KQL queries for OTEL span joins
+6. pvc-costops-analytics: Implement request rollup aggregation (Phase 3)
 
 ---
 
diff --git a/infra/env/dev/variables.tf b/infra/env/dev/variables.tf
index cf18df1..92d7c13 100644
--- a/infra/env/dev/variables.tf
+++ b/infra/env/dev/variables.tf
@@ -1,8 +1,8 @@
 variable "env" {
   type = string
   validation {
-    condition     = contains(["dev", "uat", "prod"], var.env)
-    error_message = "Environment must be one of: dev, uat, prod."
+    condition     = contains(["dev", "staging", "prod"], var.env)
+    error_message = "Environment must be one of: dev, staging, prod."
   }
 }
 variable "location" { type = string }
diff --git a/infra/env/prod/variables.tf b/infra/env/prod/variables.tf
index 1a9a003..efcd1be 100644
--- a/infra/env/prod/variables.tf
+++ b/infra/env/prod/variables.tf
@@ -1,8 +1,8 @@
 variable "env" {
   type = string
   validation {
-    condition     = contains(["dev", "uat", "prod"], var.env)
-    error_message = "Environment must be one of: dev, uat, prod."
+    condition     = contains(["dev", "staging", "prod"], var.env)
+    error_message = "Environment must be one of: dev, staging, prod."
   }
 }
 variable "location" { type = string }
diff --git a/infra/env/uat/.terraform.lock.hcl b/infra/env/staging/.terraform.lock.hcl
similarity index 100%
rename from infra/env/uat/.terraform.lock.hcl
rename to infra/env/staging/.terraform.lock.hcl
diff --git a/infra/env/uat/main.tf b/infra/env/staging/main.tf
similarity index 100%
rename from infra/env/uat/main.tf
rename to infra/env/staging/main.tf
diff --git a/infra/env/uat/terraform.tfvars b/infra/env/staging/terraform.tfvars
similarity index 89%
rename from infra/env/uat/terraform.tfvars
rename to infra/env/staging/terraform.tfvars
index 566fd2e..2f80c99 100644
--- a/infra/env/uat/terraform.tfvars
+++ b/infra/env/staging/terraform.tfvars
@@ -1,4 +1,4 @@
-env            = "uat"
+env            = "staging"
 projname       = "aigateway"
 location       = "southafricanorth"
 location_short = "san"
@@ -7,7 +7,7 @@ location_short = "san"
 # NOTE: The TF_VAR_azure_openai_endpoint environment variable (set via the
 # GitHub Environment secret AZURE_OPENAI_ENDPOINT) takes precedence over this
 # value during CI/CD runs. For local development, either set that env var or
-# update this file with the correct UAT endpoint.
+# update this file with the correct staging endpoint.
 azure_openai_endpoint = "https://mys-shared-ai-san.cognitiveservices.azure.com"
 
 codex_model       = "gpt-5.3-codex"
@@ -22,7 +22,7 @@ secrets_expiration_date = "2027-03-31T00:00:00Z"
 tags = {
   owner   = "ai-gateway-team"
   project = "aigateway"
-  env     = "uat"
+  env     = "staging"
 }
 
 enable_redis_cache = true
diff --git a/infra/env/uat/variables.tf b/infra/env/staging/variables.tf
similarity index 97%
rename from infra/env/uat/variables.tf
rename to infra/env/staging/variables.tf
index ff4b0e5..2bab6ad 100644
--- a/infra/env/uat/variables.tf
+++ b/infra/env/staging/variables.tf
@@ -1,8 +1,8 @@
 variable "env" {
   type = string
   validation {
-    condition     = contains(["dev", "uat", "prod"], var.env)
-    error_message = "Environment must be one of: dev, uat, prod."
+    condition     = contains(["dev", "staging", "prod"], var.env)
+    error_message = "Environment must be one of: dev, staging, prod."
   }
 }
 variable "location" { type = string }
diff --git a/infra/modules/aigateway_aca/main.tf b/infra/modules/aigateway_aca/main.tf
index 412723c..9216a67 100644
--- a/infra/modules/aigateway_aca/main.tf
+++ b/infra/modules/aigateway_aca/main.tf
@@ -128,6 +128,15 @@ resource "azurerm_log_analytics_workspace" "law" {
   tags                = local.tags
 }
 
+resource "azurerm_application_insights" "ai" {
+  name                = "${local.prefix}-ai-${var.location_short}"
+  location            = azurerm_resource_group.rg.location
+  resource_group_name = azurerm_resource_group.rg.name
+  application_type    = "web"
+  retention_in_days   = var.env == "prod" ? 90 : 30
+  tags                = local.tags
+}
+
 resource "azurerm_container_app_environment" "cae" {
   name                       = local.cae_name
   location                   = azurerm_resource_group.rg.location
@@ -245,6 +254,15 @@ resource "azurerm_key_vault_secret" "langfuse_secret_key" {
   depends_on = [azurerm_key_vault_access_policy.terraform_client]
 }
 
+resource "azurerm_key_vault_secret" "appinsights_connection_string" {
+  name            = "appinsights-connection-string"
+  value           = azurerm_application_insights.ai.connection_string
+  key_vault_id    = azurerm_key_vault.kv.id
+  expiration_date = var.secrets_expiration_date
+
+  depends_on = [azurerm_key_vault_access_policy.terraform_client]
+}
+
 resource "azurerm_user_assigned_identity" "ca" {
   name                = "${local.ca_name}-id"
   resource_group_name = azurerm_resource_group.rg.name
@@ -330,6 +348,12 @@ resource "azurerm_container_app" "ca" {
     }
   }
 
+  secret {
+    name                = "appinsights-connection-string"
+    key_vault_secret_id = azurerm_key_vault_secret.appinsights_connection_string.versionless_id
+    identity            = azurerm_user_assigned_identity.ca.id
+  }
+
   template {
     min_replicas = var.min_replicas
     max_replicas = var.max_replicas
@@ -429,6 +453,13 @@ resource "azurerm_container_app" "ca" {
         }
       }
 
+      # Azure Application Insights connection string (for azure-monitor-opentelemetry exporter)
+      # Use with custom LiteLLM image that includes azure-monitor-opentelemetry package
+      env {
+        name        = "APPLICATIONINSIGHTS_CONNECTION_STRING"
+        secret_name = "appinsights-connection-string"
+      }
+
       # LiteLLM commonly listens on 4000; set port as needed
     }
   }
diff --git a/infra/modules/aigateway_aca/outputs.tf b/infra/modules/aigateway_aca/outputs.tf
index f00e81a..a8dfe6b 100644
--- a/infra/modules/aigateway_aca/outputs.tf
+++ b/infra/modules/aigateway_aca/outputs.tf
@@ -29,3 +29,9 @@ output "container_app_environment_id" {
   description = "ID of the Container App Environment — used by sibling modules (e.g. dashboard_aca) to deploy into the same environment."
   value       = azurerm_container_app_environment.cae.id
 }
+
+output "application_insights_connection_string" {
+  value       = azurerm_application_insights.ai.connection_string
+  description = "Application Insights connection string for OTEL export."
+  sensitive   = true
+}
diff --git a/infra/modules/aigateway_aca/variables.tf b/infra/modules/aigateway_aca/variables.tf
index 377b408..7fce498 100644
--- a/infra/modules/aigateway_aca/variables.tf
+++ b/infra/modules/aigateway_aca/variables.tf
@@ -3,10 +3,10 @@
 
 variable "env" {
   type        = string
-  description = "Environment name (dev|uat|prod)"
+  description = "Environment name (dev|staging|prod)"
   validation {
-    condition     = contains(["dev", "uat", "prod"], var.env)
-    error_message = "Environment must be one of: dev, uat, prod."
+    condition     = contains(["dev", "staging", "prod"], var.env)
+    error_message = "Environment must be one of: dev, staging, prod."
   }
 }
 
diff --git a/infra/modules/dashboard_aca/variables.tf b/infra/modules/dashboard_aca/variables.tf
index 6aa0eea..7fbb116 100644
--- a/infra/modules/dashboard_aca/variables.tf
+++ b/infra/modules/dashboard_aca/variables.tf
@@ -1,9 +1,9 @@
 variable "env" {
   type        = string
-  description = "Environment name (dev|uat|prod)"
+  description = "Environment name (dev|staging|prod)"
   validation {
-    condition     = contains(["dev", "uat", "prod"], var.env)
-    error_message = "Environment must be one of: dev, uat, prod."
+    condition     = contains(["dev", "staging", "prod"], var.env)
+    error_message = "Environment must be one of: dev, staging, prod."
   }
 }
 
diff --git a/infra/modules/state_service_aca/variables.tf b/infra/modules/state_service_aca/variables.tf
index 51febf4..d11da7d 100644
--- a/infra/modules/state_service_aca/variables.tf
+++ b/infra/modules/state_service_aca/variables.tf
@@ -1,9 +1,9 @@
 variable "env" {
   type        = string
-  description = "Environment name (dev|uat|prod)"
+  description = "Environment name (dev|staging|prod)"
   validation {
-    condition     = contains(["dev", "uat", "prod"], var.env)
-    error_message = "Environment must be one of: dev, uat, prod."
+    condition     = contains(["dev", "staging", "prod"], var.env)
+    error_message = "Environment must be one of: dev, staging, prod."
   }
 }
 
diff --git a/infra/scripts/terraform-init.ps1 b/infra/scripts/terraform-init.ps1
index 6b3dee5..9ba1588 100644
--- a/infra/scripts/terraform-init.ps1
+++ b/infra/scripts/terraform-init.ps1
@@ -1,7 +1,7 @@
 # Load .env.local and run terraform init -upgrade
-# Usage: .\infra\scripts\terraform-init.ps1 [dev|uat|prod]
+# Usage: .\infra\scripts\terraform-init.ps1 [dev|staging|prod]
 
-param([Parameter(Mandatory=$true)][ValidateSet("dev","uat","prod")][string]$Env)
+param([Parameter(Mandatory=$true)][ValidateSet("dev","staging","prod")][string]$Env)
 
 $envFile = Join-Path $PSScriptRoot ".." ".env.local"
 if (-not (Test-Path $envFile)) {
diff --git a/infra/scripts/terraform-init.sh b/infra/scripts/terraform-init.sh
index 52f3300..c7f5798 100644
--- a/infra/scripts/terraform-init.sh
+++ b/infra/scripts/terraform-init.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 # Load .env.local and run terraform init -upgrade
-# Usage: ./infra/scripts/terraform-init.sh [dev|uat|prod]
+# Usage: ./infra/scripts/terraform-init.sh [dev|staging|prod]
 
 set -e
 
-ENV="${1:?Usage: $0 dev|uat|prod}"
+ENV="${1:?Usage: $0 dev|staging|prod}"
 case "$ENV" in
-    dev|uat|prod) ;;
+    dev|staging|prod) ;;
     *)
-        echo "Usage: $0 dev|uat|prod"
-        echo "Error: ENV must be dev, uat, or prod; got: $ENV"
+        echo "Usage: $0 dev|staging|prod"
+        echo "Error: ENV must be dev, staging, or prod; got: $ENV"
         exit 1
         ;;
 esac
diff --git a/scripts/add-federated-credentials.sh b/scripts/add-federated-credentials.sh
index f7740bc..9969b0c 100644
--- a/scripts/add-federated-credentials.sh
+++ b/scripts/add-federated-credentials.sh
@@ -3,7 +3,7 @@ set -e
 
 # Add Federated Credentials for GitHub Actions Environments
 # Use this script if you already ran bootstrap and got AADSTS700213 because
-# the workflow uses environment: dev/uat/prod but Azure only had branch-based credentials.
+# the workflow uses environment: dev/staging/prod but Azure only had branch-based credentials.
 #
 # Usage: $0 <AZURE_CLIENT_ID> <GITHUB_ORG> <GITHUB_REPO>
 # Example: $0 abc123-def456 phoenixvc ai-gateway
@@ -11,7 +11,7 @@ set -e
 if [ "$#" -ne 3 ]; then
     echo "Usage: $0 <AZURE_CLIENT_ID> <GITHUB_ORG> <GITHUB_REPO>"
     echo ""
-    echo "Adds federated identity credentials for dev, uat, prod environments"
+    echo "Adds federated identity credentials for dev, staging, prod environments"
     echo "to an existing Azure AD app registration (fixes AADSTS700213)."
     echo ""
     echo "Example: $0 \$(az ad app list --display-name pvc-shared-github-actions-oidc --query [0].appId -o tsv) phoenixvc ai-gateway"
@@ -30,8 +30,8 @@ fi
 
 command -v jq >/dev/null 2>&1 || { echo "Error: jq is required for safe JSON construction. Install jq and retry."; exit 1; }
 
-echo "Ensuring federated credentials for environments (dev, uat, prod) on app $APP_ID..."
-for ENV in dev uat prod; do
+echo "Ensuring federated credentials for environments (dev, staging, prod) on app $APP_ID..."
+for ENV in dev staging prod; do
   SUBJECT="repo:$GITHUB_ORG/$GITHUB_REPO:environment:$ENV"
   EXISTING_SUBJECT=$(az ad app federated-credential list --id "$OBJECT_ID" --query "[?name=='github-actions-$ENV'].subject" -o tsv 2>/dev/null | head -n1)
   if [ -n "$EXISTING_SUBJECT" ] && [ "$EXISTING_SUBJECT" = "$SUBJECT" ]; then
diff --git a/scripts/bootstrap.ps1 b/scripts/bootstrap.ps1
index 19a8e44..0583b05 100644
--- a/scripts/bootstrap.ps1
+++ b/scripts/bootstrap.ps1
@@ -120,8 +120,8 @@ $bytes = New-Object byte[] 32
 [System.Security.Cryptography.RandomNumberGenerator]::Create().GetBytes($bytes)
 $AIGATEWAY_KEY = [Convert]::ToBase64String($bytes)
 
-Write-Host "Ensuring Federated Credentials for GitHub Actions (environments: dev, uat, prod)..."
-foreach ($EnvName in @("dev","uat","prod")) {
+Write-Host "Ensuring Federated Credentials for GitHub Actions (environments: dev, staging, prod)..."
+foreach ($EnvName in @("dev","staging","prod")) {
     $SUBJECT = "repo:" + $GITHUB_ORG + "/" + $GITHUB_REPO + ":environment:" + $EnvName
     $EXISTING_SUBJECT = az ad app federated-credential list --id $OBJECT_ID --query "[?name=='github-actions-$EnvName'].subject" -o tsv 2>$null | Select-Object -First 1
     if ($EXISTING_SUBJECT -and ($EXISTING_SUBJECT -eq $SUBJECT)) {
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index aa093f0..18417dc 100644
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -13,7 +13,7 @@ GITHUB_REPO="$2"
 SCOPE="$3"
 
 # --- Configuration ---
-# Shared infra: OIDC app and TF state span dev/uat/prod
+# Shared infra: OIDC app and TF state span dev/staging/prod
 LOCATION="southafricanorth"
 RG_NAME="pvc-shared-tfstate-rg-san"
 CONTAINER_NAME="tfstate"
@@ -119,9 +119,9 @@ OBJECT_ID=$(az ad app show --id "$APP_ID" --query id --output tsv)
 
 AIGATEWAY_KEY=$(openssl rand -base64 32 2>/dev/null || head -c 32 /dev/urandom | base64)
 
-echo "Ensuring Federated Credentials for GitHub Actions (environments: dev, uat, prod)..."
+echo "Ensuring Federated Credentials for GitHub Actions (environments: dev, staging, prod)..."
 command -v jq >/dev/null 2>&1 || { echo "Error: jq is required for safe JSON construction. Install jq and retry."; exit 1; }
-for ENV in dev uat prod; do
+for ENV in dev staging prod; do
   SUBJECT="repo:$GITHUB_ORG/$GITHUB_REPO:environment:$ENV"
   EXISTING_SUBJECT=$(az ad app federated-credential list --id "$OBJECT_ID" --query "[?name=='github-actions-$ENV'].subject" -o tsv 2>/dev/null | head -n1)
   if [ -n "$EXISTING_SUBJECT" ] && [ "$EXISTING_SUBJECT" = "$SUBJECT" ]; then