From f7c2943b994ddc2d119701c9ee0c5625039440c3 Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 08:22:48 -0800
Subject: [PATCH 1/8] feat: per-developer GitHub Environments with OIDC

- Create 6 GitHub environments: production, integration-james,
  integration-nicole, integration-heena, integration-tim, integration-matt
- Move all variables from repo-level to environment-level
- Update orchestrate.yml: *-dev branch  integration-<name> mapping
- Uncomment environment: binding in all 7 reusable workflows
- Fix TF state key: use environment name instead of branch name
- Fix destroy.yml bugs: iteration var and unsanitized state key
- Remove auto-destroy (all environments persist)
- Add OIDC federated credentials for integration-james and production
- Create prod.tfvars for production environment
- Update GITHUB_ACTIONS_SETUP.md with developer onboarding guide
---
 .github/workflows/agent-evaluation.yml   |   1 +
 .github/workflows/destroy.yml            |   9 +-
 .github/workflows/docker-application.yml |   2 +-
 .github/workflows/docker-mcp.yml         |   2 +-
 .github/workflows/infrastructure.yml     |  20 +--
 .github/workflows/orchestrate.yml        |  55 +++++----
 .github/workflows/update-containers.yml  |   2 +-
 infra/GITHUB_ACTIONS_SETUP.md            | 147 +++++++++++++----------
 infra/terraform/prod.tfvars              |  34 ++++++
 9 files changed, 168 insertions(+), 104 deletions(-)
 create mode 100644 infra/terraform/prod.tfvars
diff --git a/.github/workflows/agent-evaluation.yml b/.github/workflows/agent-evaluation.yml
index 952507cbe..725fd44c2 100644
--- a/.github/workflows/agent-evaluation.yml
+++ b/.github/workflows/agent-evaluation.yml
@@ -54,6 +54,7 @@ jobs:
   agent-evaluation:
     name: Agent Quality Evaluation
     runs-on: ubuntu-latest
+    environment: ${{ inputs.environment || 'integration' }}
     permissions:
       contents: read
       id-token: write   # Needed for OIDC → DefaultAzureCredential
diff --git a/.github/workflows/destroy.yml b/.github/workflows/destroy.yml
index a47111ce3..38bfb6a09 100644
--- a/.github/workflows/destroy.yml
+++ b/.github/workflows/destroy.yml
@@ -30,7 +30,7 @@ jobs:
   terraform_destroy:
     name: Terraform Destroy
     runs-on: ubuntu-latest
-    # environment: ${{ inputs.environment || 'dev' }}  # Commented out to use repo-level variables
+    environment: ${{ inputs.environment || 'integration' }}
     permissions:
       id-token: write
       contents: read
@@ -66,13 +66,14 @@ jobs:
             -var subscription_id=${{ vars.AZURE_SUBSCRIPTION_ID }} \
             -var acr_name=${{ vars.ACR_NAME }} \
             -var location=${{ vars.AZ_REGION }} \
-            -var environment=${{ inputs.environment || 'dev' }} \
+            -var environment=${{ inputs.environment || 'integration' }} \
             -var docker_image_mcp=${{ vars.DOCKER_IMAGE_MCP }} \
             -var docker_image_backend=${{ vars.DOCKER_IMAGE_BACKEND }} \
-            -var iteration=${{ inputs.environment || 'dev' }}
+            -var iteration=${{ vars.ITERATION }}
         env:
           TFSTATE_RG: ${{ vars.TFSTATE_RG }}
           TFSTATE_ACCOUNT: ${{ vars.TFSTATE_ACCOUNT }}
           TFSTATE_CONTAINER: ${{ vars.TFSTATE_CONTAINER }}
-          TFSTATE_KEY: "${{ github.event.repository.name }}-${{ github.ref_name }}.tfstate"
+          # Use environment name for state key — must match infrastructure.yml
+          TFSTATE_KEY: "${{ github.event.repository.name }}-${{ inputs.environment || 'integration' }}.tfstate"
 
diff --git a/.github/workflows/docker-application.yml b/.github/workflows/docker-application.yml
index 49089cc44..407406041 100644
--- a/.github/workflows/docker-application.yml
+++ b/.github/workflows/docker-application.yml
@@ -27,7 +27,7 @@ jobs:
   build:
     name: Build & Push Backend Image
     runs-on: ubuntu-latest
-    # environment: ${{ inputs.environment || 'dev' }}  # Commented out to use repo-level variables
+    environment: ${{ inputs.environment || 'integration' }}
     permissions:
       id-token: write
       contents: read
diff --git a/.github/workflows/docker-mcp.yml b/.github/workflows/docker-mcp.yml
index 1d995f362..f111351a9 100644
--- a/.github/workflows/docker-mcp.yml
+++ b/.github/workflows/docker-mcp.yml
@@ -27,7 +27,7 @@ jobs:
   build:
     name: Build & Push MCP Image
     runs-on: ubuntu-latest
-    # environment: ${{ inputs.environment || 'dev' }}  # Commented out to use repo-level variables
+    environment: ${{ inputs.environment || 'integration' }}
     permissions:
       id-token: write
       contents: read
diff --git a/.github/workflows/infrastructure.yml b/.github/workflows/infrastructure.yml
index 25f33238f..3ba7374ff 100644
--- a/.github/workflows/infrastructure.yml
+++ b/.github/workflows/infrastructure.yml
@@ -41,7 +41,7 @@ jobs:
   tf:
     name: Terraform Deployment
     runs-on: ubuntu-latest
-    # environment: removed to use repo-level variables
+    environment: ${{ inputs.environment }}
     if: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.iac-tool || 'tf') == 'tf' }}
     permissions:
       id-token: write
@@ -65,13 +65,13 @@ jobs:
       - name: Terraform Setup
         uses: hashicorp/setup-terraform@v3
 
-      - name: Sanitize branch name for state key
+      - name: Sanitize environment name for state key
         id: sanitize
         run: |
           # Replace / and other invalid chars with - for valid Azure blob name
-          BRANCH="${{ github.head_ref || github.ref_name }}"
-          SAFE_BRANCH=$(echo "$BRANCH" | sed 's/[^a-zA-Z0-9._-]/-/g')
-          echo "branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT
+          ENV="${{ inputs.environment }}"
+          SAFE_ENV=$(echo "$ENV" | sed 's/[^a-zA-Z0-9._-]/-/g')
+          echo "env=$SAFE_ENV" >> $GITHUB_OUTPUT
 
       - name: Terraform Init/Plan/Apply
         id: terraform
@@ -87,14 +87,14 @@ jobs:
                          -backend-config="container_name=${TFSTATE_CONTAINER}" -backend-config="use_oidc=true" -backend-config="use_azuread_auth=true"
           terraform plan -out tfplan \
             -var project_name=${{ github.event.repository.name }} \
-            -var environment=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.environment || (github.base_ref == 'main' && 'prod') || (github.base_ref == 'int-agentic' && 'integration') || 'dev' }} \
+            -var environment=${{ inputs.environment }} \
             -var tenant_id=${{ vars.AZURE_TENANT_ID }} \
             -var subscription_id=${{ vars.AZURE_SUBSCRIPTION_ID }} \
             -var acr_name=${{ vars.ACR_NAME }} \
             -var location=${{ vars.AZ_REGION }} \
             -var docker_image_mcp=${{ vars.DOCKER_IMAGE_MCP }} \
             -var docker_image_backend=${{ vars.DOCKER_IMAGE_BACKEND }} \
-            -var iteration=${{ (github.event_name != 'workflow_dispatch' && github.base_ref != 'main' && github.base_ref != 'int-agentic') && '${GITHUB_SHA:0:7}' || vars.ITERATION }}
+            -var iteration=${{ vars.ITERATION }}
 
           terraform apply -auto-approve tfplan
 
@@ -109,12 +109,12 @@ jobs:
           TFSTATE_RG: ${{ vars.TFSTATE_RG }}
           TFSTATE_ACCOUNT: ${{ vars.TFSTATE_ACCOUNT }}
           TFSTATE_CONTAINER: ${{ vars.TFSTATE_CONTAINER }}
-          # Use sanitized branch name for valid Azure blob name
-          TFSTATE_KEY: "${{ github.event.repository.name }}-${{ steps.sanitize.outputs.branch }}.tfstate"
+          # Use environment name for state key — each env gets its own TF state
+          TFSTATE_KEY: "${{ github.event.repository.name }}-${{ steps.sanitize.outputs.env }}.tfstate"
 
   bicep:
       runs-on: ubuntu-latest
-      # environment: removed to use repo-level variables
+      environment: ${{ inputs.environment }}
       if: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.iac-tool || 'tf') == 'bicep' }}
       permissions:
         id-token: write
diff --git a/.github/workflows/orchestrate.yml b/.github/workflows/orchestrate.yml
index 38c421497..ec09578fd 100644
--- a/.github/workflows/orchestrate.yml
+++ b/.github/workflows/orchestrate.yml
@@ -3,18 +3,22 @@ name: Orchestrate Deployment
 # ─────────────────────────────────────────────────────────────────────
 # Pipeline modes:
 #   PR → main / int-agentic  ➜  tests-only  (validate against existing env)
-#   Push → main              ➜  full deploy  (deploy to prod after merge)
-#   Push → tjs-infra-as-code ➜  full deploy  (dev, with auto-destroy)
+#   Push → main              ➜  full deploy  (deploy to production)
+#   Push → *-dev             ➜  full deploy  (deploy to integration-<name>)
 #   Manual dispatch          ➜  full deploy  (chosen environment)
+#
+# Per-developer environments:
+#   Each developer pushes to their own <name>-dev branch.
+#   The pipeline maps <name>-dev → integration-<name> environment,
+#   which contains that developer's own Azure subscription credentials.
 # ─────────────────────────────────────────────────────────────────────
 
 on:
   workflow_dispatch:
     inputs:
       target_env:
-        type: choice
-        description: Environment to deploy
-        options: [dev, test, prod]
+        type: string
+        description: "Environment to deploy (e.g. integration-james, production)"
         required: true
 
   pull_request:
@@ -25,7 +29,7 @@ on:
   push:
     branches:
       - main
-      - tjs-infra-as-code
+      - '*-dev'
 
 permissions:
   contents: read
@@ -51,19 +55,26 @@ jobs:
           if [ "$EVENT" = "workflow_dispatch" ]; then
             ENV="${{ inputs.target_env }}"
           elif [ "$EVENT" = "pull_request" ]; then
+            # PRs: resolve from the target (base) branch
             case "${{ github.base_ref }}" in
-              main)         ENV="prod" ;;
+              main)         ENV="production" ;;
               int-agentic)  ENV="integration" ;;
-              *)            ENV="dev" ;;
+              *)            ENV="integration" ;;
             esac
           elif [ "$EVENT" = "push" ]; then
-            case "${{ github.ref_name }}" in
-              main)               ENV="prod" ;;
-              tjs-infra-as-code)  ENV="dev" ;;
-              *)                  ENV="dev" ;;
+            BRANCH="${{ github.ref_name }}"
+            case "$BRANCH" in
+              main)
+                ENV="production" ;;
+              *-dev)
+                # Extract developer name: james-dev → integration-james
+                DEV_NAME="${BRANCH%-dev}"
+                ENV="integration-${DEV_NAME}" ;;
+              *)
+                ENV="integration" ;;
             esac
           else
-            ENV="dev"
+            ENV="integration"
           fi
 
           # ── Resolve pipeline mode ──
@@ -90,6 +101,7 @@ jobs:
     needs: pipeline-config
     if: needs.pipeline-config.outputs.full_deploy == 'true'
     runs-on: ubuntu-latest
+    environment: ${{ needs.pipeline-config.outputs.environment }}
     steps:
       - name: Azure OIDC Login
         uses: azure/login@v2
@@ -155,6 +167,7 @@ jobs:
     needs: pipeline-config
     if: needs.pipeline-config.outputs.full_deploy == 'false'
     runs-on: ubuntu-latest
+    environment: ${{ needs.pipeline-config.outputs.environment }}
     outputs:
       backend_endpoint: ${{ steps.lookup.outputs.backend_endpoint }}
       mcp_endpoint: ${{ steps.lookup.outputs.mcp_endpoint }}
@@ -242,17 +255,7 @@ jobs:
     secrets: inherit
 
   # ────────────────────────────────────────────────────────────────────
-  # Optional: Destroy infrastructure (dev branches only, after tests pass)
+  # NOTE: Auto-destroy is disabled. All environments (integration-* and
+  # production) persist their infrastructure. To tear down an environment
+  # manually, use: workflow_dispatch → destroy.yml with the target env.
   # ────────────────────────────────────────────────────────────────────
-  destroy-infrastructure:
-    needs: [pipeline-config, integration-tests, agent-evaluation]
-    if: >-
-      always()
-      && needs.pipeline-config.outputs.full_deploy == 'true'
-      && needs.integration-tests.result == 'success'
-      && (needs.agent-evaluation.result == 'success' || needs.agent-evaluation.result == 'skipped' || needs.agent-evaluation.result == 'failure')
-      && (github.ref_name == 'tjs-infra-as-code' || github.ref_name == 'james-dev' || (inputs.target_env && inputs.target_env == 'dev'))
-    uses: ./.github/workflows/destroy.yml
-    with:
-      environment: ${{ needs.pipeline-config.outputs.environment }}
-    secrets: inherit
diff --git a/.github/workflows/update-containers.yml b/.github/workflows/update-containers.yml
index 51460d7ff..9137649c6 100644
--- a/.github/workflows/update-containers.yml
+++ b/.github/workflows/update-containers.yml
@@ -35,7 +35,7 @@ jobs:
   update-containers:
     name: Update Container Apps
     runs-on: ubuntu-latest
-    # environment: ${{ inputs.environment }}  # Commented out to use repo-level variables
+    environment: ${{ inputs.environment }}
     permissions:
       id-token: write
       contents: read
diff --git a/infra/GITHUB_ACTIONS_SETUP.md b/infra/GITHUB_ACTIONS_SETUP.md
index 010b9bf67..a8178319a 100644
--- a/infra/GITHUB_ACTIONS_SETUP.md
+++ b/infra/GITHUB_ACTIONS_SETUP.md
@@ -7,7 +7,8 @@ This guide documents how to configure GitHub Actions for automated infrastructur
 The CI/CD pipeline uses:
 - **OIDC Authentication** - No secrets stored in GitHub, uses federated identity
 - **Remote Terraform State** - Shared state in Azure Storage for team collaboration
-- **Environment-based Deployments** - Separate configs for dev, integration, prod
+- **Per-developer GitHub Environments** - Each developer has their own `integration-<name>` environment backed by their own Azure subscription
+- **Environment-scoped Variables** - All Azure credentials and config are stored per-environment, not at repo level
 
 ## Architecture
 
@@ -17,6 +18,10 @@ The CI/CD pipeline uses:
 ├─────────────────────────────────────────────────────────────────────┤
 │  orchestrate.yml                                                     │
 │    ├── pipeline-config (determine mode + environment)               │
+│    │     ├── main branch       → production environment             │
+│    │     ├── james-dev branch  → integration-james environment      │
+│    │     ├── nicole-dev branch → integration-nicole environment     │
+│    │     └── <name>-dev branch → integration-<name> environment     │
 │    │                                                                 │
 │    ├── [Full Deploy – push/manual]                                   │
 │    │     ├── preflight (enable storage access)                      │
@@ -25,8 +30,7 @@ The CI/CD pipeline uses:
 │    │     ├── docker-mcp.yml (build MCP service image)               │
 │    │     ├── update-containers.yml (refresh running apps)           │
 │    │     ├── integration-tests.yml (smoke tests)                    │
-│    │     ├── agent-evaluation.yml (AI quality evaluation)           │
-│    │     └── destroy.yml (optional cleanup, dev only)               │
+│    │     └── agent-evaluation.yml (AI quality evaluation)           │
 │    │                                                                 │
 │    ├── [Tests Only – pull requests]                                  │
 │    │     └── resolve-endpoints (az containerapp show)               │
@@ -37,10 +41,9 @@ The CI/CD pipeline uses:
                               │ OIDC (no secrets)
                               ▼
 ┌─────────────────────────────────────────────────────────────────────┐
-│                         Azure                                        │
+│                  Azure (per developer subscription)                  │
 ├─────────────────────────────────────────────────────────────────────┤
-│  ├── App Registration (GitHub-Actions-OpenAIWorkshop)               │
-│  │     └── Federated Credentials (main, int-agentic, PRs)           │
+│  ├── App Registration (federated credential for environment)        │
 │  ├── Storage Account (Terraform state)                              │
 │  ├── Container Registry (Docker images)                             │
 │  ├── Container Apps (MCP + Backend)                                 │
@@ -90,54 +93,49 @@ Write-Host "Subscription ID: $SubscriptionId"
 
 ## Step 2: Configure Federated Credentials
 
-Create federated credentials for each branch/environment.
+Create federated credentials for the GitHub environment that maps to this developer.
 
-> **Important:** GitHub org/repos that have a [customized OIDC subject claim template](https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect#customizing-the-subject-claims-for-an-organization-or-repository)
-> use a numeric subject format: `repository_owner_id:<owner_id>:repository_id:<repo_id>:...`.
-> You can find these IDs via `gh api repos/{owner}/{repo} --jq '.owner.id, .id'`.
-> If your org has NOT customized the template, use the default `repo:ORG/REPO:...` format.
+> **Important:** This repo uses a [customized OIDC subject claim template](https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect#customizing-the-subject-claims-for-an-organization-or-repository)
+> with `repository_owner_id` and `repository_id` instead of the default `repo:ORG/REPO:...` format.
+> All CI jobs bind an `environment:` context, so the OIDC subject includes `environment:<env-name>`.
 
 ```powershell
 $AppId = "YOUR_APP_ID"  # From Step 1
 
-# --- Option A: Default subject format ---
-# Main branch (prod)
+# ── Per-developer integration environment ──
+# Replace <name> with your developer name (e.g., james, nicole, tim)
+# The subject must exactly match what GitHub presents in the OIDC token.
 az ad app federated-credential create --id $AppId --parameters '{
-    "name": "github-main",
+    "name": "github-env-integration-<name>",
     "issuer": "https://token.actions.githubusercontent.com",
-    "subject": "repo:YOUR_ORG/YOUR_REPO:ref:refs/heads/main",
+    "subject": "repository_owner_id:6154722:repository_id:605201834:environment:integration-<name>",
     "audiences": ["api://AzureADTokenExchange"]
 }'
 
-# --- Option B: Customized (numeric ID) subject format ---
-# Use this if your org has customized the OIDC subject claim template.
-# Replace OWNER_ID and REPO_ID with actual values from the GitHub API.
-
-# Main branch (prod)
-az ad app federated-credential create --id $AppId --parameters '{
-    "name": "github-main",
-    "issuer": "https://token.actions.githubusercontent.com",
-    "subject": "repository_owner_id:OWNER_ID:repository_id:REPO_ID:ref:refs/heads/main",
-    "audiences": ["api://AzureADTokenExchange"]
-}'
-
-# Integration branch
+# ── Production environment (only needed for the prod subscription owner) ──
 az ad app federated-credential create --id $AppId --parameters '{
-    "name": "github-int-agentic",
+    "name": "github-env-production",
     "issuer": "https://token.actions.githubusercontent.com",
-    "subject": "repository_owner_id:OWNER_ID:repository_id:REPO_ID:ref:refs/heads/int-agentic",
+    "subject": "repository_owner_id:6154722:repository_id:605201834:environment:production",
     "audiences": ["api://AzureADTokenExchange"]
 }'
 
-# Pull Requests
+# ── Pull Requests (for PR validation against existing env) ──
+# Note: PR jobs also bind environment:, so the subject includes it.
+# You may need a credential for the PR context too if your PRs run OIDC.
 az ad app federated-credential create --id $AppId --parameters '{
     "name": "github-pullrequests",
     "issuer": "https://token.actions.githubusercontent.com",
-    "subject": "repository_owner_id:OWNER_ID:repository_id:REPO_ID:pull_request",
+    "subject": "repository_owner_id:6154722:repository_id:605201834:pull_request",
     "audiences": ["api://AzureADTokenExchange"]
 }'
 ```
 
+> **How to find your IDs:**
+> - Owner ID: `gh api repos/microsoft/OpenAIWorkshop --jq '.owner.id'` → `6154722`
+> - Repo ID: `gh api repos/microsoft/OpenAIWorkshop --jq '.id'` → `605201834`
+> - Check current OIDC template: `gh api repos/microsoft/OpenAIWorkshop/actions/oidc/customization/sub`
+
 ## Step 3: Assign Azure Roles
 
 ```powershell
@@ -224,36 +222,44 @@ az role assignment create `
     --scope $STORAGE_ID
 ```
 
-## Step 5: Configure GitHub Repository Variables
+## Step 5: Configure GitHub Environment Variables
+
+All variables are stored at the **environment level** (not repo level). Each developer's
+`integration-<name>` environment contains their own Azure subscription credentials.
 
-Go to **GitHub → Repository → Settings → Secrets and Variables → Actions → Variables**
+Go to **GitHub → Repository → Settings → Environments → `integration-<name>` → Environment variables**
 
-### Required Variables
+### Required Variables (per environment)
 
 | Variable | Description | Example Value |
 |----------|-------------|---------------|
-| `AZURE_CLIENT_ID` | App Registration Client ID | `1d34c51d-9d49-48f3-9e48-6a0f099c5f03` |
-| `AZURE_TENANT_ID` | Azure AD Tenant ID | `0fbe7234-45ea-498b-b7e4-1a8b2d3be4d9` |
-| `AZURE_SUBSCRIPTION_ID` | Azure Subscription ID | `840b5c5c-3f4a-459a-94fc-6bad2a969f9d` |
+| `AZURE_CLIENT_ID` | App Registration Client ID | `1d34c51d-...` |
+| `AZURE_TENANT_ID` | Azure AD Tenant ID | `0fbe7234-...` |
+| `AZURE_SUBSCRIPTION_ID` | Azure Subscription ID | `840b5c5c-...` |
 | `TFSTATE_RG` | Resource group for TF state | `rg-tfstate` |
-| `TFSTATE_ACCOUNT` | Storage account name | `sttfstateoaiworkshop` |
+| `TFSTATE_ACCOUNT` | Storage account name (globally unique) | `sttfstateoaiworkshop` |
 | `TFSTATE_CONTAINER` | Blob container name | `tfstate` |
-| `ACR_NAME` | Azure Container Registry name | `acropenaiworkshop002` |
-| `PROJECT_NAME` | Project identifier | `OpenAIWorkshop` |
+| `ACR_NAME` | Azure Container Registry name | `OpenAIWorkshopdevacr002` |
+| `PROJECT_NAME` | Project identifier | `openaiworkshop` |
 | `ITERATION` | Deployment iteration | `002` |
 | `AZ_REGION` | Azure region | `eastus2` |
-| `AZURE_AI_PROJECT_ENDPOINT` | AI Foundry project endpoint for evaluation | `https://eastus2oai.services.ai.azure.com/api/projects/eastus2` |
-| `AZURE_OPENAI_EVAL_ENDPOINT` | AI Services endpoint for judge models | `https://eastus2oai.services.ai.azure.com/` |
+| `DOCKER_IMAGE_MCP` | MCP Docker image name | `mcp-service` |
+| `DOCKER_IMAGE_BACKEND` | Backend Docker image name | `backend-service` |
+| `REGISTRY_LOGIN_SERVER` | Container registry server | `docker.io` |
+| `AZURE_AI_PROJECT_ENDPOINT` | AI Foundry project endpoint for evaluation | `https://...services.ai.azure.com/api/projects/...` |
+| `AZURE_OPENAI_EVAL_ENDPOINT` | AI Services endpoint for judge models | `https://...services.ai.azure.com/` |
 | `AZURE_OPENAI_EVAL_DEPLOYMENT` | Model deployment for LLM-as-judge | `gpt-5.2` |
 
-### Optional Environment-Specific Variables
+### Current Environments
 
-Create GitHub Environments (`dev`, `integration`, `prod`) for environment-specific overrides:
-
-| Environment | Variable | Value |
-|-------------|----------|-------|
-| `prod` | `AZ_REGION` | `eastus` |
-| `prod` | `ITERATION` | `001` |
+| Environment | Owner | Branch Mapping |
+|-------------|-------|----------------|
+| `production` | James | `main` |
+| `integration-james` | James | `james-dev` |
+| `integration-nicole` | Nicole | `nicole-dev` |
+| `integration-heena` | Heena | `heena-dev` |
+| `integration-tim` | Tim | `tim-dev` |
+| `integration-matt` | Matt | `matt-dev` |
 
 ---
 
@@ -263,10 +269,10 @@ The orchestrator has two modes determined by the trigger:
 
 | Trigger | Mode | What runs | Environment |
 |---------|------|-----------|-------------|
-| **PR → `main`** | Tests only | `resolve-endpoints` → `integration-tests` | `prod` |
+| **PR → `main`** | Tests only | `resolve-endpoints` → `integration-tests` | `production` |
 | **PR → `int-agentic`** | Tests only | `resolve-endpoints` → `integration-tests` | `integration` |
-| **Push to `main`** (after merge) | Full deploy | Preflight → Infra → Build → Update → Tests → Eval | `prod` |
-| **Push to `tjs-infra-as-code`** | Full deploy | Preflight → Infra → Build → Update → Tests → Eval → Destroy | `dev` |
+| **Push to `main`** (after merge) | Full deploy | Preflight → Infra → Build → Update → Tests → Eval | `production` |
+| **Push to `<name>-dev`** | Full deploy | Preflight → Infra → Build → Update → Tests → Eval | `integration-<name>` |
 | **Manual dispatch** | Full deploy | Preflight → Infra → Build → Update → Tests → Eval | Chosen env |
 
 ### Tests-Only Mode (PRs)
@@ -288,23 +294,42 @@ environment.
 
 | Workflow | Trigger | What it does |
 |----------|---------|--------------|
-| `orchestrate.yml` | PRs, push to main/tjs-infra-as-code, manual | Orchestrates full or tests-only pipeline |
+| `orchestrate.yml` | PRs, push to main/*-dev, manual | Orchestrates full or tests-only pipeline |
 | `infrastructure.yml` | Called by orchestrate (full deploy) | Terraform plan/apply |
 | `docker-application.yml` | Called by orchestrate (full deploy) | Build backend container |
 | `docker-mcp.yml` | Called by orchestrate (full deploy) | Build MCP container |
 | `update-containers.yml` | Called by orchestrate (full deploy) | Refresh Container Apps |
-| `destroy.yml` | Called by orchestrate (dev only) | Terraform destroy |
+| `destroy.yml` | Manual dispatch only | Terraform destroy |
 | `agent-evaluation.yml` | Called by orchestrate (full deploy) | AI quality evaluation via Azure AI Foundry |
 | `integration-tests.yml` | Called by orchestrate (both modes) | Run pytest integration tests |
 
 ## Branch to Environment Mapping
 
-| Branch | Environment | Auto-destroy |
-|--------|-------------|--------------|
-| `main` | `prod` | ❌ No |
-| `int-agentic` | `integration` | ❌ No |
-| `tjs-infra-as-code` | `dev` | ✅ Yes |
-| Other branches | `dev` | Depends on config |
+| Branch | Environment | Persistent |
+|--------|-------------|------------|
+| `main` | `production` | ✅ Yes |
+| `james-dev` | `integration-james` | ✅ Yes |
+| `nicole-dev` | `integration-nicole` | ✅ Yes |
+| `heena-dev` | `integration-heena` | ✅ Yes |
+| `tim-dev` | `integration-tim` | ✅ Yes |
+| `matt-dev` | `integration-matt` | ✅ Yes |
+| `<name>-dev` | `integration-<name>` | ✅ Yes |
+
+> All environments persist their infrastructure. To tear down manually, use
+> `workflow_dispatch` → `destroy.yml` with the target environment.
+
+---
+
+## Developer Onboarding
+
+To add a new developer to the pipeline:
+
+1. **Create an Azure App Registration** in the developer's own Azure tenant (Step 1 above)
+2. **Add a federated credential** with subject `repository_owner_id:6154722:repository_id:605201834:environment:integration-<name>` (Step 2 above)
+3. **Assign Azure roles** to the service principal (Steps 3 and 3b above)
+4. **Create TF state storage** in the developer's subscription (Step 4 above)
+5. **Ask a repo admin** to create the `integration-<name>` GitHub Environment and set the 16 environment variables (Step 5 above)
+6. **Developer pushes to `<name>-dev`** branch — the pipeline will pick up the environment automatically
 
 ---
 
diff --git a/infra/terraform/prod.tfvars b/infra/terraform/prod.tfvars
new file mode 100644
index 000000000..8ee8b17d5
--- /dev/null
+++ b/infra/terraform/prod.tfvars
@@ -0,0 +1,34 @@
+# Production environment configuration
+environment      = "production"
+location         = "eastus2"
+project_name     = "OpenAIWorkshop"
+iteration        = "002"
+tenant_id        = "0fbe7234-45ea-498b-b7e4-1a8b2d3be4d9"
+subscription_id  = "840b5c5c-3f4a-459a-94fc-6bad2a969f9d"
+
+# Optional: Set to false if you want to use API keys (not recommended)
+use_cosmos_managed_identity = true
+
+# OpenAI deployment configuration
+create_openai_deployment = true
+openai_deployment_name   = "gpt-5.2-chat"
+openai_model_name        = "gpt-5.2-chat"
+openai_model_version     = "2025-12-11"
+openai_api_version       = "2025-04-01-preview"
+openai_deployment_capacity = 200  # 200k tokens/minute
+
+# OpenAI embedding deployment configuration
+create_openai_embedding_deployment = true
+openai_embedding_deployment_name   = "text-embedding-ada-002"
+openai_embedding_model_name        = "text-embedding-ada-002"
+openai_embedding_model_version     = "2"
+
+# Networking configuration
+enable_networking       = true
+enable_private_endpoint = true
+vnet_address_prefix            = "10.10.0.0/16"
+container_apps_subnet_prefix   = "10.10.0.0/23"
+private_endpoint_subnet_prefix = "10.10.2.0/24"
+
+# MCP Service Security
+mcp_internal_only = true

From 055915b110813295ae1374fe0c9d75ea517962cc Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 08:34:00 -0800
Subject: [PATCH 2/8] fix: strip hyphens from ACR name in Docker workflows

---
 .github/workflows/docker-application.yml | 3 ++-
 .github/workflows/docker-mcp.yml         | 3 ++-
 .github/workflows/update-containers.yml  | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/docker-application.yml b/.github/workflows/docker-application.yml
index 407406041..3f1ab122a 100644
--- a/.github/workflows/docker-application.yml
+++ b/.github/workflows/docker-application.yml
@@ -46,10 +46,11 @@ jobs:
         id: acr
         run: |
           # Construct ACR name matching Terraform pattern: {project}{env}acr{iteration}
+          # ACR names must be alphanumeric — strip hyphens to match Terraform's replace("-", "")
           PROJECT="${{ vars.PROJECT_NAME || 'OpenAIWorkshop' }}"
           ENV="${{ inputs.environment || 'dev' }}"
           ITERATION="${{ vars.ITERATION || '002' }}"
-          ACR_NAME="${PROJECT}${ENV}acr${ITERATION}"
+          ACR_NAME=$(echo "${PROJECT}${ENV}acr${ITERATION}" | tr -d '-')
           echo "name=${ACR_NAME}" >> $GITHUB_OUTPUT
           echo "server=${ACR_NAME}.azurecr.io" >> $GITHUB_OUTPUT
           echo "Using ACR: ${ACR_NAME}"
diff --git a/.github/workflows/docker-mcp.yml b/.github/workflows/docker-mcp.yml
index f111351a9..3242142a8 100644
--- a/.github/workflows/docker-mcp.yml
+++ b/.github/workflows/docker-mcp.yml
@@ -46,10 +46,11 @@ jobs:
         id: acr
         run: |
           # Construct ACR name matching Terraform pattern: {project}{env}acr{iteration}
+          # ACR names must be alphanumeric — strip hyphens to match Terraform's replace("-", "")
           PROJECT="${{ vars.PROJECT_NAME || 'OpenAIWorkshop' }}"
           ENV="${{ inputs.environment || 'dev' }}"
           ITERATION="${{ vars.ITERATION || '002' }}"
-          ACR_NAME="${PROJECT}${ENV}acr${ITERATION}"
+          ACR_NAME=$(echo "${PROJECT}${ENV}acr${ITERATION}" | tr -d '-')
           echo "name=${ACR_NAME}" >> $GITHUB_OUTPUT
           echo "server=${ACR_NAME}.azurecr.io" >> $GITHUB_OUTPUT
           echo "Using ACR: ${ACR_NAME}"
diff --git a/.github/workflows/update-containers.yml b/.github/workflows/update-containers.yml
index 9137649c6..294d3aee7 100644
--- a/.github/workflows/update-containers.yml
+++ b/.github/workflows/update-containers.yml
@@ -64,7 +64,8 @@ jobs:
           echo "backend_app=ca-be-${ITERATION}" >> $GITHUB_OUTPUT
           
           # ACR name follows Terraform pattern: {project}{env}acr{iteration}
-          ACR_NAME="${PROJECT}${ENV}acr${ITERATION}"
+          # ACR names must be alphanumeric — strip hyphens to match Terraform's replace("-", "")
+          ACR_NAME=$(echo "${PROJECT}${ENV}acr${ITERATION}" | tr -d '-')
           echo "acr_name=${ACR_NAME}" >> $GITHUB_OUTPUT
           echo "acr_server=${ACR_NAME}.azurecr.io" >> $GITHUB_OUTPUT
           echo "Using ACR: ${ACR_NAME}"

From 0f10403dd1353584d10db7c3dcdd21510762bdd1 Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 10:15:58 -0800
Subject: [PATCH 3/8] Add auto-import recovery for Terraform 'already exists'
 errors

When a Terraform apply fails midway (e.g., timeout, quota), resources may
exist in Azure but not in TF state. On retry, Terraform fails with 'already
exists'. This change adds a retry loop (max 3 attempts) that:
1. Detects 'already exists' errors in apply output
2. Parses the TF resource address and Azure resource ID
3. Auto-imports orphaned resources into state
4. Retries the apply

Eliminates need for manual deletion via Azure Portal.
---
 .github/workflows/infrastructure.yml | 74 +++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/infrastructure.yml b/.github/workflows/infrastructure.yml
index 3ba7374ff..00a4f579b 100644
--- a/.github/workflows/infrastructure.yml
+++ b/.github/workflows/infrastructure.yml
@@ -82,21 +82,73 @@ jobs:
           export ARM_TENANT_ID="${{ vars.AZURE_TENANT_ID }}"
           export ARM_SUBSCRIPTION_ID="${{ vars.AZURE_SUBSCRIPTION_ID }}"
 
+          # Common -var flags used by plan and import
+          TF_VARS=(
+            -var project_name=${{ github.event.repository.name }}
+            -var environment=${{ inputs.environment }}
+            -var tenant_id=${{ vars.AZURE_TENANT_ID }}
+            -var subscription_id=${{ vars.AZURE_SUBSCRIPTION_ID }}
+            -var acr_name=${{ vars.ACR_NAME }}
+            -var location=${{ vars.AZ_REGION }}
+            -var docker_image_mcp=${{ vars.DOCKER_IMAGE_MCP }}
+            -var docker_image_backend=${{ vars.DOCKER_IMAGE_BACKEND }}
+            -var iteration=${{ vars.ITERATION }}
+          )
+
           terraform init -backend-config="resource_group_name=${TFSTATE_RG}" \
                          -backend-config="key=${TFSTATE_KEY}" -backend-config="storage_account_name=${TFSTATE_ACCOUNT}" \
                          -backend-config="container_name=${TFSTATE_CONTAINER}" -backend-config="use_oidc=true" -backend-config="use_azuread_auth=true"
-          terraform plan -out tfplan \
-            -var project_name=${{ github.event.repository.name }} \
-            -var environment=${{ inputs.environment }} \
-            -var tenant_id=${{ vars.AZURE_TENANT_ID }} \
-            -var subscription_id=${{ vars.AZURE_SUBSCRIPTION_ID }} \
-            -var acr_name=${{ vars.ACR_NAME }} \
-            -var location=${{ vars.AZ_REGION }} \
-            -var docker_image_mcp=${{ vars.DOCKER_IMAGE_MCP }} \
-            -var docker_image_backend=${{ vars.DOCKER_IMAGE_BACKEND }} \
-            -var iteration=${{ vars.ITERATION }}
 
-          terraform apply -auto-approve tfplan
+          # ── Apply with auto-import on "already exists" errors ──
+          # If a prior run partially created resources but crashed before recording
+          # them in state, Terraform will fail with "already exists". This loop
+          # detects those errors, auto-imports the orphaned resources, and retries.
+          MAX_ATTEMPTS=3
+          for attempt in $(seq 1 $MAX_ATTEMPTS); do
+            echo "🔄 Terraform apply attempt $attempt/$MAX_ATTEMPTS"
+
+            terraform plan -out tfplan "${TF_VARS[@]}"
+
+            if terraform apply -auto-approve tfplan 2>&1 | tee /tmp/tf_apply.log; then
+              echo "✅ Terraform apply succeeded"
+              break
+            fi
+
+            # Check if the failure is due to "already exists" errors
+            if ! grep -q "already exists" /tmp/tf_apply.log; then
+              echo "❌ Terraform failed with a non-import error"
+              cat /tmp/tf_apply.log
+              exit 1
+            fi
+
+            if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then
+              echo "❌ Terraform failed after $MAX_ATTEMPTS attempts"
+              cat /tmp/tf_apply.log
+              exit 1
+            fi
+
+            echo "⚠️ Detected 'already exists' errors — auto-importing orphaned resources..."
+
+            # Parse error output: extract terraform address and Azure resource ID pairs
+            # Error format:  with azurerm_container_app.mcp,
+            # followed by:   a resource with the ID "/.../containerApps/ca-mcp-002" already exists
+            while IFS= read -r line; do
+              # Extract the TF resource address (e.g. azurerm_container_app.mcp)
+              tf_addr=$(echo "$line" | grep -oP 'with \K[a-zA-Z0-9_.]+(?=,)')
+              # Extract the Azure resource ID
+              azure_id=$(echo "$line" | grep -oP 'the ID "\K[^"]+')
+
+              if [ -n "$tf_addr" ] && [ -n "$azure_id" ]; then
+                echo "  📥 Importing $tf_addr → $azure_id"
+                terraform import "${TF_VARS[@]}" "$tf_addr" "$azure_id" || true
+              fi
+            done < <(
+              # Combine consecutive lines so address + ID are on the same logical line
+              cat /tmp/tf_apply.log | tr '\n' '§' | sed 's/§│/│/g' | tr '§' '\n' | grep "already exists"
+            )
+
+            echo "🔁 Retrying terraform apply..."
+          done
 
           output=$(terraform output -raw openai_endpoint 2>/dev/null || true)
           echo "MODEL_ENDPOINT=$output" >> $GITHUB_OUTPUT

From 1a93fc32fb28ada132d857de70112110a79de236 Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 10:18:32 -0800
Subject: [PATCH 4/8] Rename workflow to CI/CD Pipeline; fix PR trigger for
 int-agentic

- Rename 'Orchestrate Deployment' -> 'CI/CD Pipeline'
- Remove int-agentic from pull_request trigger  PRs to int-agentic
  were failing because environment 'integration' has no OIDC federated
  credential. PR validation only needed for main (production gate).
- Simplify base_ref case statement
---
 .github/workflows/orchestrate.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/orchestrate.yml b/.github/workflows/orchestrate.yml
index ec09578fd..93716859d 100644
--- a/.github/workflows/orchestrate.yml
+++ b/.github/workflows/orchestrate.yml
@@ -1,4 +1,4 @@
-name: Orchestrate Deployment
+name: CI/CD Pipeline
 
 # ─────────────────────────────────────────────────────────────────────
 # Pipeline modes:
@@ -24,7 +24,6 @@ on:
   pull_request:
     branches:
       - main
-      - int-agentic
 
   push:
     branches:
@@ -58,7 +57,6 @@ jobs:
             # PRs: resolve from the target (base) branch
             case "${{ github.base_ref }}" in
               main)         ENV="production" ;;
-              int-agentic)  ENV="integration" ;;
               *)            ENV="integration" ;;
             esac
           elif [ "$EVENT" = "push" ]; then

From 2aa25a0261ea14b38367a876094ec0478da68821 Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 13:12:06 -0800
Subject: [PATCH 5/8] fix: gracefully skip PR tests when target environment not
 yet deployed

resolve-endpoints now sets deployed=false instead of exit 1 when
Container Apps don't exist. integration-tests job checks this flag
and skips when the environment hasn't been deployed yet (e.g., first
PR to production).
---
 .github/workflows/orchestrate.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/orchestrate.yml b/.github/workflows/orchestrate.yml
index 93716859d..56a34d48b 100644
--- a/.github/workflows/orchestrate.yml
+++ b/.github/workflows/orchestrate.yml
@@ -169,6 +169,7 @@ jobs:
     outputs:
       backend_endpoint: ${{ steps.lookup.outputs.backend_endpoint }}
       mcp_endpoint: ${{ steps.lookup.outputs.mcp_endpoint }}
+      deployed: ${{ steps.lookup.outputs.deployed }}
     steps:
       - name: Azure OIDC Login
         uses: azure/login@v2
@@ -199,11 +200,13 @@ jobs:
 
           if [ -n "$BE_FQDN" ]; then
             echo "backend_endpoint=https://${BE_FQDN}" >> $GITHUB_OUTPUT
+            echo "deployed=true" >> $GITHUB_OUTPUT
             echo "✅ Backend: https://${BE_FQDN}"
           else
-            echo "::error::Backend Container App not found in $RG – is the environment deployed?"
+            echo "::warning::Backend Container App not found in $RG – environment not yet deployed. Skipping PR tests."
             echo "backend_endpoint=" >> $GITHUB_OUTPUT
-            exit 1
+            echo "deployed=false" >> $GITHUB_OUTPUT
+            exit 0
           fi
 
           if [ -n "$MCP_FQDN" ]; then
@@ -224,7 +227,7 @@ jobs:
     if: >-
       always() && (
         needs.update-containers.result == 'success'
-        || needs.resolve-endpoints.result == 'success'
+        || (needs.resolve-endpoints.result == 'success' && needs.resolve-endpoints.outputs.deployed == 'true')
       )
     uses: ./.github/workflows/integration-tests.yml
     with:

From 21e09be4693a9e91bb01e5bf8a10e052f30fe412 Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 13:16:25 -0800
Subject: [PATCH 6/8] feat: auto-promotion pipeline (dev  int-agentic  main)

- New workflow: promote-to-main.yml
  Triggered on push to int-agentic, creates/updates a single rolling
  PR to main with latest commit summary. Human review required.

- New job: auto-merge in orchestrate.yml
  After successful full pipeline on *-dev branch, auto-merges the
  open PR from that dev branch into int-agentic (squash merge).

- Updated permissions: contents:write, pull-requests:write

Flow: dev push  full pipeline  auto-merge to int-agentic
      auto-create PR to main  human review  merge  prod deploy
---
 .github/workflows/orchestrate.yml     | 57 +++++++++++++++-
 .github/workflows/promote-to-main.yml | 94 +++++++++++++++++++++++++++
 2 files changed, 149 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/promote-to-main.yml

diff --git a/.github/workflows/orchestrate.yml b/.github/workflows/orchestrate.yml
index 56a34d48b..19d01b36d 100644
--- a/.github/workflows/orchestrate.yml
+++ b/.github/workflows/orchestrate.yml
@@ -2,11 +2,16 @@ name: CI/CD Pipeline
 
 # ─────────────────────────────────────────────────────────────────────
 # Pipeline modes:
-#   PR → main / int-agentic  ➜  tests-only  (validate against existing env)
+#   PR → main                ➜  tests-only  (validate against existing env)
 #   Push → main              ➜  full deploy  (deploy to production)
 #   Push → *-dev             ➜  full deploy  (deploy to integration-<name>)
 #   Manual dispatch          ➜  full deploy  (chosen environment)
 #
+# Promotion flow:
+#   *-dev push → full pipeline → auto-merge PR to int-agentic
+#   int-agentic push → promote-to-main.yml → creates/updates PR to main
+#   main PR → human review + tests-only → merge → full deploy to production
+#
 # Per-developer environments:
 #   Each developer pushes to their own <name>-dev branch.
 #   The pipeline maps <name>-dev → integration-<name> environment,
@@ -31,7 +36,8 @@ on:
       - '*-dev'
 
 permissions:
-  contents: read
+  contents: write
+  pull-requests: write
   id-token: write
 
 
@@ -255,6 +261,53 @@ jobs:
       eval_limit: 5
     secrets: inherit
 
+  # ────────────────────────────────────────────────────────────────────
+  # Step 7: Auto-merge dev branch PR into int-agentic
+  #   After a successful full deploy from a *-dev branch, automatically
+  #   merge the open PR from that branch into int-agentic. This triggers
+  #   the promote-to-main workflow which creates/updates a PR to main.
+  # ────────────────────────────────────────────────────────────────────
+  auto-merge:
+    needs: [pipeline-config, integration-tests, agent-evaluation]
+    if: >-
+      always()
+      && needs.pipeline-config.outputs.full_deploy == 'true'
+      && needs.integration-tests.result == 'success'
+      && (needs.agent-evaluation.result == 'success' || needs.agent-evaluation.result == 'skipped')
+      && github.event_name == 'push'
+      && endsWith(github.ref_name, '-dev')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Merge dev PR into int-agentic
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          BRANCH="${{ github.ref_name }}"
+          echo "🔍 Looking for open PR: ${BRANCH} → int-agentic"
+
+          PR_NUMBER=$(gh pr list \
+            --repo "${{ github.repository }}" \
+            --base int-agentic \
+            --head "$BRANCH" \
+            --state open \
+            --json number \
+            --jq '.[0].number // empty')
+
+          if [ -n "$PR_NUMBER" ]; then
+            echo "✅ Found PR #${PR_NUMBER}"
+            echo "🔀 Merging ${BRANCH} → int-agentic..."
+            gh pr merge "$PR_NUMBER" \
+              --repo "${{ github.repository }}" \
+              --squash \
+              --auto \
+              --subject "chore: merge ${BRANCH} into int-agentic (auto)" \
+              --body "Auto-merged after successful CI/CD pipeline run ${{ github.run_id }}"
+            echo "✅ PR #${PR_NUMBER} merge initiated"
+          else
+            echo "ℹ️ No open PR found from ${BRANCH} → int-agentic"
+            echo "   Create one with: gh pr create --base int-agentic --head ${BRANCH}"
+          fi
+
   # ────────────────────────────────────────────────────────────────────
   # NOTE: Auto-destroy is disabled. All environments (integration-* and
   # production) persist their infrastructure. To tear down an environment
diff --git a/.github/workflows/promote-to-main.yml b/.github/workflows/promote-to-main.yml
new file mode 100644
index 000000000..cb2c93faf
--- /dev/null
+++ b/.github/workflows/promote-to-main.yml
@@ -0,0 +1,94 @@
+name: Promote to Main
+
+# ─────────────────────────────────────────────────────────────────────
+# Triggered when int-agentic receives a merge (push).
+# Creates or updates a single rolling PR from int-agentic → main.
+# The PR accumulates all developer changes and requires human review
+# before merging to production.
+# ─────────────────────────────────────────────────────────────────────
+
+on:
+  push:
+    branches:
+      - int-agentic
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  promote:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Create or update PR to main
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Check if there's already an open PR from int-agentic → main
+          EXISTING_PR=$(gh pr list \
+            --base main \
+            --head int-agentic \
+            --state open \
+            --json number \
+            --jq '.[0].number // empty')
+
+          if [ -n "$EXISTING_PR" ]; then
+            echo "✅ PR #${EXISTING_PR} already exists (int-agentic → main)"
+            echo "   Updating PR body with latest commit info..."
+
+            # Get recent commits since the PR was created
+            RECENT_COMMITS=$(git log origin/main..HEAD --oneline --no-merges | head -20)
+
+            gh pr edit "$EXISTING_PR" --body "## Promotion: int-agentic → main
+
+          This is an auto-maintained PR that promotes changes from \`int-agentic\` to \`main\` (production).
+
+          **Review required** before merging to production.
+
+          ### Recent Changes
+          \`\`\`
+          ${RECENT_COMMITS}
+          \`\`\`
+
+          ### Pipeline Status
+          - Integration tests passed on each developer's environment before merge to int-agentic
+          - Merging this PR will trigger a full production deployment
+
+          ---
+          _Last updated: $(date -u '+%Y-%m-%d %H:%M UTC') by commit ${{ github.sha }}_"
+
+            echo "✅ PR #${EXISTING_PR} body updated"
+          else
+            echo "📝 Creating new PR: int-agentic → main"
+
+            RECENT_COMMITS=$(git log origin/main..HEAD --oneline --no-merges | head -20)
+
+            gh pr create \
+              --base main \
+              --head int-agentic \
+              --title "Promote: int-agentic → main (production)" \
+              --body "## Promotion: int-agentic → main
+
+          This is an auto-maintained PR that promotes changes from \`int-agentic\` to \`main\` (production).
+
+          **Review required** before merging to production.
+
+          ### Recent Changes
+          \`\`\`
+          ${RECENT_COMMITS}
+          \`\`\`
+
+          ### Pipeline Status
+          - Integration tests passed on each developer's environment before merge to int-agentic
+          - Merging this PR will trigger a full production deployment
+
+          ---
+          _Created: $(date -u '+%Y-%m-%d %H:%M UTC') by commit ${{ github.sha }}_"
+
+            echo "✅ New PR created"
+          fi

From 632597869475dade91849cc10bba915271b2a531 Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 14:37:45 -0800
Subject: [PATCH 7/8] ci: skip pipeline on doc-only changes; update workflows
 readme

---
 .github/workflows/orchestrate.yml     |  10 +++
 .github/workflows/promote-to-main.yml |   4 +
 .github/workflows/readme.md           | 101 ++++++++++++++++++--------
 3 files changed, 85 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/orchestrate.yml b/.github/workflows/orchestrate.yml
index 19d01b36d..52632b9b8 100644
--- a/.github/workflows/orchestrate.yml
+++ b/.github/workflows/orchestrate.yml
@@ -29,11 +29,21 @@ on:
   pull_request:
     branches:
       - main
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'
+      - 'LICENSE'
+      - '.github/workflows/readme.md'
 
   push:
     branches:
       - main
       - '*-dev'
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'
+      - 'LICENSE'
+      - '.github/workflows/readme.md'
 
 permissions:
   contents: write
diff --git a/.github/workflows/promote-to-main.yml b/.github/workflows/promote-to-main.yml
index cb2c93faf..5eef368a3 100644
--- a/.github/workflows/promote-to-main.yml
+++ b/.github/workflows/promote-to-main.yml
@@ -11,6 +11,10 @@ on:
   push:
     branches:
       - int-agentic
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'
+      - 'LICENSE'
 
 permissions:
   contents: read
diff --git a/.github/workflows/readme.md b/.github/workflows/readme.md
index 2b79de81d..2f7cb8baf 100644
--- a/.github/workflows/readme.md
+++ b/.github/workflows/readme.md
@@ -1,40 +1,81 @@
-# Workflows
+# CI/CD Pipeline
 
-## infra-plan-apply.yml
+## Flow
 
-### Summary
+```
+*-dev  ──push──▶  CI/CD Pipeline (8 stages)  ──pass──▶  auto-merge PR → int-agentic
+                                                                │
+int-agentic  ◀──────────────────────────────────────────────────┘
+     │
+     └──push──▶  promote-to-main.yml  ──▶  creates/updates PR → main
+                                                                │
+main  ◀──────────  human review + merge  ◀──────────────────────┘
+     │
+     └──push──▶  CI/CD Pipeline (production deploy)
+```
 
-The infra plan and apply pipeline is a pipeline to deploy the infrastructure necessary for the Azure Open AI Workshop ot run. It is currently configured to do a workflow dispatch that expects you to choose whether you want bicep or terraform as well as a target environment. Terraform is currently tested. 
+**Doc-only changes** (`.md`, `docs/`, `LICENSE`) are ignored — no pipeline runs.
 
-### Requirements
+## Workflows
 
-#### Environment Variables in GitHub
+| File | Trigger | Purpose |
+|------|---------|---------|
+| `orchestrate.yml` | push to `*-dev`/`main`, PR to `main` | Main CI/CD: infra → build → deploy → test → eval → auto-merge |
+| `promote-to-main.yml` | push to `int-agentic` | Creates/updates a rolling PR from `int-agentic` → `main` |
+| `infrastructure.yml` | called by orchestrate | Terraform plan + apply with auto-import recovery |
+| `docker-application.yml` | called by orchestrate | Build & push backend container to ACR |
+| `docker-mcp.yml` | called by orchestrate | Build & push MCP container to ACR |
+| `update-containers.yml` | called by orchestrate | Deploy new images to Container Apps |
+| `integration-tests.yml` | called by orchestrate | API tests against live environment |
+| `agent-evaluation.yml` | called by orchestrate | Agent quality eval → Azure AI Foundry |
+| `destroy.yml` | manual dispatch | Terraform destroy for a target environment |
 
-Configure your repo to have necessary variables for your environments. At a minimum, the following are needed:
-- AZ_REGION: azure region you plan to deploy to
-- AZURE_CLIENT_ID: the deployment client. Currently, this is used with an OIDC process so we don't need to set the secrets. Because of the way we are deploying, needs the ability to assign RBAC in Azure as well as creating resources.
-- AZURE_SUBSCRIPTION_ID: the subscription to deploy into.
-- AZURE_TENANT_ID: the tenant the client was created in
-- DOCKER_IMAGE_BACKEND: docker image repo/name:tag from docker hub for backend FastAPI service. Still need to test with ACR. Also need to test with dynamic build from the repo.
-- DOCKER_IMAGE_MCP: docker image repo/name:tag from docker hub for MCP service. Still need to test with ACR. Also need to test with dynamic build from the repo.
+## Pipeline Stages
 
-Required for terraform:
-- TFSTATE_ACCOUNT: We expect an Azure Storage account for the backend. This is the account name.
-- TFSTATE_CONTAINER: the blob container within the storage account where we will hold the state.
-- TFSTATE_RG: resource group holding the storage account.
+| # | Stage | Push | PR |
+|---|-------|------|----|
+| 0 | **pipeline-config** — resolve environment & mode | ✅ | ✅ |
+| 1 | **preflight** — unlock TF state storage | ✅ | — |
+| 2 | **deploy-infrastructure** — Terraform | ✅ | — |
+| 3 | **build containers** (backend + MCP, parallel) | ✅ | — |
+| 4 | **update-containers** — deploy to Container Apps | ✅ | — |
+| — | **resolve-endpoints** — look up existing env | — | ✅ |
+| 5 | **integration-tests** | ✅ | ✅* |
+| 6 | **agent-evaluation** → Foundry | ✅ | — |
+| 7 | **auto-merge** — squash-merge dev PR → int-agentic | ✅† | — |
 
-#### Azure Set Up
+\* Skipped if target environment not yet deployed  
+† Only on `*-dev` branches
 
-- Azure Subscription
-- Resource group with a storage account for terraform
-- Azure Service Principal (app registration) configured with federated credentials:
+## Per-Developer Environments
 
-```
-az ad app federated-credential create --id "$APP_ID" --parameters "$(jq -cn \
---arg org "$ORG" --arg repo "$REPO_NAME" '{
-name: ("github-"+$repo+"-env-dev"),
-issuer: "https://token.actions.githubusercontent.com",
-subject: ("repo:"+$org+"/"+$repo+":environment:dev"),
-audiences: ["api://AzureADTokenExchange"]
-}')"
-```
\ No newline at end of file
+Each developer has their own GitHub Environment (`integration-<name>`) with their own Azure subscription and OIDC credentials. All config is stored as **environment-level variables** (zero repo-level variables).
+
+Branch mapping: `james-dev` → `integration-james`, `main` → `production`
+
+## Required Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `AZURE_CLIENT_ID` | App registration client ID (OIDC) |
+| `AZURE_TENANT_ID` | Entra ID tenant |
+| `AZURE_SUBSCRIPTION_ID` | Target subscription |
+| `AZ_REGION` | Azure region |
+| `PROJECT_NAME` | Project name (e.g. `OpenAIWorkshop`) |
+| `ITERATION` | Deployment iteration (e.g. `002`) |
+| `TFSTATE_ACCOUNT` | TF state storage account |
+| `TFSTATE_CONTAINER` | TF state blob container |
+| `TFSTATE_RG` | TF state resource group |
+| `MCP_SERVER_URI` | MCP service URI |
+| `AZURE_OPENAI_CHAT_DEPLOYMENT` | Chat model deployment |
+| `AZURE_OPENAI_EVAL_DEPLOYMENT` | Eval model deployment |
+| `AZURE_AI_PROJECT_ENDPOINT` | AI Foundry project endpoint |
+| `AZURE_OPENAI_API_VERSION` | OpenAI API version |
+
+## Azure Setup
+
+1. Azure subscription with a resource group + storage account for Terraform state
+2. App registration with OIDC federated credentials for each GitHub Environment:
+   ```
+   Subject: repo:microsoft/OpenAIWorkshop:environment:<env-name>
+   ```
\ No newline at end of file

From e61f15087a14e73322472cdc209ec7355856b069 Mon Sep 17 00:00:00 2001
From: "James N." <james.nguyen@microsoft.com>
Date: Fri, 13 Feb 2026 14:41:13 -0800
Subject: [PATCH 8/8] docs: consolidate CI/CD docs, link infra README to
 workflows readme

---
 infra/README.md | 95 ++++++++++++++++---------------------------------
 1 file changed, 30 insertions(+), 65 deletions(-)

diff --git a/infra/README.md b/infra/README.md
index 389cfa0cd..687cf67fe 100644
--- a/infra/README.md
+++ b/infra/README.md
@@ -336,78 +336,43 @@ az containerapp logs show --name ca-be-002 --resource-group rg-OpenAIWorkshop-de
 
 ## Automated CI/CD (GitHub Actions)
 
-For enterprise deployments, we recommend using GitHub Actions with OIDC authentication for secure, automated deployments.
+The project uses a fully automated CI/CD pipeline with **per-developer environments** and **OIDC authentication** (no stored secrets).
 
-### 📖 Complete Setup Guide
+### Pipeline Flow
 
-See **[GITHUB_ACTIONS_SETUP.md](./GITHUB_ACTIONS_SETUP.md)** for detailed instructions on:
+```
+*-dev push → CI/CD Pipeline → auto-merge → int-agentic → PR to main → human review → production deploy
+```
 
-- Creating Azure App Registration with federated credentials
-- Configuring GitHub repository variables and secrets
-- Setting up Terraform remote state in Azure Storage
-- Granting required Azure RBAC roles
+Doc-only changes (`.md`, `docs/`, `LICENSE`) are ignored and do not trigger the pipeline.
 
-### Quick Overview
+### Setup
 
-```mermaid
-flowchart TB
-    subgraph GitHub["GitHub Repository"]
-        Push["Git Push"]
-        Orchestrate["orchestrate.yml"]
-        Infra["infrastructure.yml"]
-        DockerApp["docker-application.yml"]
-        DockerMCP["docker-mcp.yml"]
-        Update["update-containers.yml"]
-        Tests["integration-tests.yml"]
-    end
-    
-    subgraph Azure["Azure"]
-        OIDC["OIDC Federation"]
-        TFState["Terraform State"]
-        ACR["Container Registry"]
-        Resources["Azure Resources"]
-    end
-    
-    Push --> Orchestrate
-    Orchestrate --> OIDC
-    Orchestrate --> Infra
-    Infra --> TFState
-    Infra --> Resources
-    Orchestrate --> DockerApp
-    Orchestrate --> DockerMCP
-    DockerApp --> ACR
-    DockerMCP --> ACR
-    Orchestrate --> Update
-    Update --> Resources
-    Orchestrate --> Tests
-```
+1. **Azure**: App Registration with OIDC federated credentials — see [GITHUB_ACTIONS_SETUP.md](./GITHUB_ACTIONS_SETUP.md)
+2. **GitHub**: Create an Environment (`integration-<name>`) with environment-level variables (no repo-level vars)
+3. **Terraform state**: Storage account in Azure — see [GITHUB_ACTIONS_SETUP.md](./GITHUB_ACTIONS_SETUP.md)
 
-### GitHub Actions Features
+### Required Environment Variables
 
-| Feature | Description |
-|---------|-------------|
-| **OIDC Authentication** | No secrets stored in GitHub - uses federated identity |
-| **Remote State** | Terraform state stored in Azure Storage for team collaboration |
-| **Multi-Environment** | Automatic environment detection based on branch |
-| **Parallel Builds** | Backend and MCP containers build simultaneously |
-| **Integration Tests** | Automated tests run after deployment |
-| **Auto Cleanup** | Optional infrastructure destruction for dev branches |
-
-### Required GitHub Variables
-
-Set these in your repository settings (Settings → Secrets and variables → Actions → Variables):
-
-| Variable | Description | Example |
-|----------|-------------|---------|
-| `AZURE_CLIENT_ID` | App Registration Client ID | `1d34c51d-...` |
-| `AZURE_TENANT_ID` | Azure AD Tenant ID | `0fbe7234-...` |
-| `AZURE_SUBSCRIPTION_ID` | Azure Subscription ID | `840b5c5c-...` |
-| `TFSTATE_RG` | Resource group for Terraform state | `rg-tfstate` |
-| `TFSTATE_ACCOUNT` | Storage account for Terraform state | `sttfstateoaiworkshop` |
-| `TFSTATE_CONTAINER` | Blob container for state files | `tfstate` |
-| `PROJECT_NAME` | Project name for resource naming | `OpenAIWorkshop` |
-| `ITERATION` | Iteration suffix | `002` |
-| `AZ_REGION` | Azure region | `eastus2` |
+| Variable | Example |
+|----------|---------|
+| `AZURE_CLIENT_ID` | `1d34c51d-...` |
+| `AZURE_TENANT_ID` | `0fbe7234-...` |
+| `AZURE_SUBSCRIPTION_ID` | `840b5c5c-...` |
+| `AZ_REGION` | `eastus2` |
+| `PROJECT_NAME` | `OpenAIWorkshop` |
+| `ITERATION` | `002` |
+| `TFSTATE_RG` / `TFSTATE_ACCOUNT` / `TFSTATE_CONTAINER` | TF state storage |
+| `AZURE_AI_PROJECT_ENDPOINT` | AI Foundry endpoint |
+| `AZURE_OPENAI_EVAL_DEPLOYMENT` | Eval model name |
+
+### 📖 Full Pipeline Documentation
+
+See **[../.github/workflows/readme.md](../.github/workflows/readme.md)** for complete details on:
+- Pipeline stages and promotion flow
+- Workflow file reference
+- Per-developer environment architecture
+- Path filtering rules
 
 ---