Skip to content

Post-Deploy Smoke Test #1221

Post-Deploy Smoke Test

Post-Deploy Smoke Test #1221

Workflow file for this run

# ═══════════════════════════════════════════════════════════════════════════════
# Post-Deploy Smoke Test — Verify production health after deployment
# ═══════════════════════════════════════════════════════════════════════════════
# Triggered automatically after a successful Vercel deployment.
# Hits the /api/health endpoint (with retry logic) and validates the response.
# Also checks critical page routes return 200, and measures response times.
#
# Behaviour by environment:
# • Production — failures are BLOCKING (exit 1), surfaces via CI status.
# • Preview — failures are NON-BLOCKING (warning annotation only).
#
# This catches:
# • Broken builds that somehow passed CI
# • Environment variable misconfiguration
# • Supabase connectivity issues post-deploy
# • Missing/broken API routes
#
# For Vercel deployments, deployment_status events fire automatically.
# The workflow also supports manual trigger for ad-hoc health checks.
# ═══════════════════════════════════════════════════════════════════════════════
name: Post-Deploy Smoke Test
on:
deployment_status:
workflow_dispatch:
inputs:
target_url:
description: "URL to smoke test (e.g. https://your-app.vercel.app)"
required: true
type: string
permissions:
contents: read
deployments: read
concurrency:
group: smoke-test-${{ github.event.deployment.id || 'manual' }}
cancel-in-progress: true
jobs:
smoke:
name: Smoke Test
runs-on: ubuntu-latest
timeout-minutes: 5
# Run on successful deployment OR manual trigger
if: >-
(github.event_name == 'deployment_status' && github.event.deployment_status.state == 'success') ||
github.event_name == 'workflow_dispatch'
env:
TARGET_URL: ${{ github.event.deployment_status.target_url || github.event.inputs.target_url }}
DEPLOY_ENV: ${{ github.event.deployment_status.environment || 'manual' }}
steps:
- name: Wait for deployment to stabilize
run: |
# Both production and preview deploys need warm-up for Vercel cold starts.
# Production uses a unique deployment URL that may take time to resolve.
if [ "$DEPLOY_ENV" = "Production" ]; then
echo "Production deploy — waiting 30s for DNS + cold start"
sleep 30
else
echo "Preview deploy ($DEPLOY_ENV) — waiting 30s for cold start"
sleep 30
fi
- name: Verify target URL
run: |
if [ -z "$TARGET_URL" ]; then
echo "::error::No target URL available"
exit 1
fi
echo "Smoke testing: $TARGET_URL (env: $DEPLOY_ENV)"
# ── Health endpoint (deep check with retries) ────────────────────
- name: Health check — /api/health
run: |
echo "## Post-Deploy Smoke Test" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "**Target:** \`$TARGET_URL\`" >> "$GITHUB_STEP_SUMMARY"
echo "**Environment:** $DEPLOY_ENV" >> "$GITHUB_STEP_SUMMARY"
echo "**Time:** $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
# Retry up to 5 times with 15s backoff (handles Vercel cold starts + DNS)
MAX_RETRIES=5
RETRY_DELAY=15
attempt=0
http_code=""
response=""
reached=false
while [ $attempt -lt $MAX_RETRIES ]; do
attempt=$((attempt + 1))
echo "Health check attempt $attempt/$MAX_RETRIES..."
# Capture body AND HTTP status separately (no -f flag)
http_code=$(curl -s -o /tmp/health_body.txt -w "%{http_code}" \
--max-time 20 "$TARGET_URL/api/health" 2>/dev/null) || true
response=$(cat /tmp/health_body.txt 2>/dev/null || echo "")
# Got an HTTP response (any status code) — endpoint is reachable
if [ -n "$http_code" ] && [ "$http_code" != "000" ]; then
reached=true
break
fi
# Connection-level failure (DNS, timeout, refused) — retry
if [ $attempt -lt $MAX_RETRIES ]; then
echo "Attempt $attempt failed (no HTTP response), retrying in ${RETRY_DELAY}s..."
sleep $RETRY_DELAY
fi
done
echo "| Route | Status |" >> "$GITHUB_STEP_SUMMARY"
echo "|-------|--------|" >> "$GITHUB_STEP_SUMMARY"
# ── No HTTP response at all (connection failure) ──
if [ "$reached" != "true" ]; then
echo "| /api/health | ❌ Connection failed ($MAX_RETRIES attempts) |" >> "$GITHUB_STEP_SUMMARY"
if [ "$DEPLOY_ENV" = "Production" ]; then
echo "::error::Health endpoint unreachable at $TARGET_URL/api/health after $MAX_RETRIES attempts"
exit 1
else
echo "::warning::Health endpoint unreachable on preview deploy — non-blocking"
echo "> ⚠️ Preview deploy health check failed — this is non-blocking." >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
fi
# ── Vercel Deployment Protection (401/403) ──
if [ "$http_code" = "401" ] || [ "$http_code" = "403" ]; then
echo "| /api/health | ⏭️ HTTP $http_code — Vercel deployment protection (skipped) |" >> "$GITHUB_STEP_SUMMARY"
echo "::notice::Health check skipped — Vercel deployment protection returned HTTP $http_code on deployment-specific URL"
exit 0
fi
# ── Got a response — parse the health status ──
status=$(echo "$response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))" 2>/dev/null || echo "parse_error")
if [ "$status" = "healthy" ] || [ "$status" = "degraded" ]; then
echo "| /api/health | ✅ $status (HTTP $http_code) |" >> "$GITHUB_STEP_SUMMARY"
else
echo "| /api/health | ❌ $status (HTTP $http_code) |" >> "$GITHUB_STEP_SUMMARY"
if [ "$DEPLOY_ENV" = "Production" ]; then
echo "::error::Health check returned status: $status (HTTP $http_code)"
echo "Response: $response"
exit 1
else
echo "::warning::Health check returned status: $status on preview deploy — non-blocking"
fi
fi
# ── Critical page routes (HTTP 200 check) ───────────────────────
- name: Check critical routes
run: |
failed=0
routes=("/" "/app" "/app/scan")
for route in "${routes[@]}"; do
status_code=$(curl -so /dev/null -w "%{http_code}" --max-time 20 "$TARGET_URL$route" 2>/dev/null)
# Vercel deployment protection — skip gracefully
if [ "$status_code" = "401" ] || [ "$status_code" = "403" ]; then
echo "| $route | ⏭️ $status_code (Vercel protection) |" >> "$GITHUB_STEP_SUMMARY"
continue
fi
if [ "$status_code" -ge 200 ] && [ "$status_code" -lt 400 ]; then
echo "| $route | ✅ $status_code |" >> "$GITHUB_STEP_SUMMARY"
else
echo "| $route | ❌ $status_code |" >> "$GITHUB_STEP_SUMMARY"
if [ "$DEPLOY_ENV" = "Production" ]; then
echo "::error::Route $route returned HTTP $status_code"
else
echo "::warning::Route $route returned HTTP $status_code on preview deploy"
fi
failed=1
fi
done
if [ "$failed" -ne 0 ]; then
if [ "$DEPLOY_ENV" = "Production" ]; then
exit 1
else
echo "::warning::Some routes failed on preview deploy — non-blocking"
fi
fi
# ── Response time baseline ───────────────────────────────────────
- name: Measure response times
if: ${{ !cancelled() }}
run: |
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "### Response Times" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "| Route | Time (ms) |" >> "$GITHUB_STEP_SUMMARY"
echo "|-------|-----------|" >> "$GITHUB_STEP_SUMMARY"
routes=("/" "/api/health")
for route in "${routes[@]}"; do
time_ms=$(curl -so /dev/null -w "%{time_total}" --max-time 15 "$TARGET_URL$route" 2>/dev/null | awk '{printf "%.0f", $1 * 1000}')
echo "| $route | ${time_ms}ms |" >> "$GITHUB_STEP_SUMMARY"
# Warn if response time > 3 seconds
if [ "$time_ms" -gt 3000 ]; then
echo "::warning::Slow response: $route took ${time_ms}ms"
fi
done
- name: Summary
if: ${{ !cancelled() }}
run: |
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "---" >> "$GITHUB_STEP_SUMMARY"
echo "✅ **Post-deploy smoke test passed.**" >> "$GITHUB_STEP_SUMMARY"