diff --git a/docker-compose.loadtest.yml b/docker-compose.loadtest.yml
new file mode 100644
index 0000000..92e9841
--- /dev/null
+++ b/docker-compose.loadtest.yml
@@ -0,0 +1,131 @@
+version: '3.8'
+
+services:
+  # HAProxy load balancer with consistent hashing
+  haproxy:
+    image: haproxy:2.8-alpine
+    ports:
+      - "8080:8080"
+      - "8404:8404"  # Stats page
+    volumes:
+      - ./loadtest/haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro
+    depends_on:
+      - goblet-1
+      - goblet-2
+      - goblet-3
+    networks:
+      - goblet-net
+
+  # Goblet instance 1 - shard A
+  goblet-1:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      - GOBLET_PORT=8080
+      - GOBLET_CACHE_ROOT=/cache
+      - GOBLET_INSTANCE_ID=1
+    volumes:
+      - cache-1:/cache
+    networks:
+      - goblet-net
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+
+  # Goblet instance 2 - shard B
+  goblet-2:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      - GOBLET_PORT=8080
+      - GOBLET_CACHE_ROOT=/cache
+      - GOBLET_INSTANCE_ID=2
+    volumes:
+      - cache-2:/cache
+    networks:
+      - goblet-net
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+
+  # Goblet instance 3 - shard C
+  goblet-3:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      - GOBLET_PORT=8080
+      - GOBLET_CACHE_ROOT=/cache
+      - GOBLET_INSTANCE_ID=3
+    volumes:
+      - cache-3:/cache
+    networks:
+      - goblet-net
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+
+  # Prometheus for metrics collection
+  prometheus:
+    image: prom/prometheus:latest
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./loadtest/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - prometheus-data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+    networks:
+      - goblet-net
+
+  # Grafana for metrics visualization
+  grafana:
+    image: grafana/grafana:latest
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+    volumes:
+      - grafana-data:/var/lib/grafana
+      - ./loadtest/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
+    depends_on:
+      - prometheus
+    networks:
+      - goblet-net
+
+  # Load test generator using k6
+  k6:
+    image: grafana/k6:latest
+    profiles:
+      - loadtest
+    volumes:
+      - ./loadtest/k6-script.js:/scripts/test.js:ro
+    command: run /scripts/test.js
+    environment:
+      - K6_PROMETHEUS_RW_SERVER_URL=http://prometheus:9090/api/v1/write
+      - TARGET_URL=http://haproxy:8080
+    depends_on:
+      - haproxy
+    networks:
+      - goblet-net
+
+networks:
+  goblet-net:
+    driver: bridge
+
+volumes:
+  cache-1:
+  cache-2:
+  cache-3:
+  prometheus-data:
+  grafana-data:
diff --git a/docs/operations/load-testing.md b/docs/operations/load-testing.md
new file mode 100644
index 0000000..cf32b2f
--- /dev/null
+++ b/docs/operations/load-testing.md
@@ -0,0 +1,535 @@
+# Load Testing
+
+This guide explains how to load test Goblet to validate performance and capacity before production deployment.
+
+## Overview
+
+Load testing helps you:
+- Validate deployment capacity
+- Identify performance bottlenecks
+- Tune cache sizes and resource limits
+- Establish baseline metrics
+- Test failure scenarios
+
+## Quick Start
+
+### Using Docker Compose
+
+The fastest way to run load tests:
+
+```bash
+cd loadtest
+
+# Start test environment (3 Goblet instances + monitoring)
+make start
+
+# Run Python-based load test
+make loadtest-python
+
+# View results
+open http://localhost:8404  # HAProxy stats
+open http://localhost:9090  # Prometheus
+open http://localhost:3000  # Grafana
+
+# Cleanup
+make stop
+```
+
+##
+
+ Test Environment Architecture
+
+```
+┌──────────────┐
+│   HAProxy    │  Load balancer with consistent hashing
+│   (port 8080)│
+└───────┬──────┘
+        │
+    ┌───┴────┬────────┐
+    │        │        │
+┌───▼───┐ ┌──▼───┐ ┌──▼───┐
+│Goblet │ │Goblet│ │Goblet│
+│  -1   │ │  -2  │ │  -3  │
+└───┬───┘ └──┬───┘ └──┬───┘
+    │        │        │
+┌───▼────────▼────────▼───┐
+│      Prometheus          │
+│  (port 9090)             │
+└────────┬─────────────────┘
+         │
+┌────────▼─────────────────┐
+│      Grafana             │
+│  (port 3000)             │
+└──────────────────────────┘
+```
+
+## Test Tools
+
+### Python Load Test
+
+Flexible, easy to customize:
+
+```bash
+python3 loadtest/loadtest.py \
+  --url http://localhost:8080 \
+  --workers 20 \
+  --requests 100 \
+  --repos github.com/kubernetes/kubernetes \
+          github.com/golang/go
+```
+
+**Options:**
+- `--url`: Target URL
+- `--workers`: Concurrent workers
+- `--requests`: Requests per worker
+- `--think-time`: Delay between requests (ms)
+- `--repos`: Repository list to test
+- `--output`: JSON output file
+
+**Output:**
+```
+=== Load Test Summary ===
+
+Total Requests:    2000
+Successful:        1995
+Failed:            5
+Success Rate:      99.75%
+Total Duration:    45.23s
+Requests/sec:      44.21
+
+Response Times (ms):
+  Min:             12.34
+  Max:             456.78
+  Mean:            89.45
+  Median:          67.89
+  P95:             234.56
+  P99:             389.12
+```
+
+### k6 Load Test
+
+Advanced load testing with gradual ramp-up:
+
+```bash
+# Run k6 test
+docker-compose --profile loadtest up k6
+```
+
+**Test stages:**
+- Ramp to 10 VUs (2 min)
+- Stay at 10 VUs (5 min)
+- Ramp to 50 VUs (2 min)
+- Stay at 50 VUs (5 min)
+- Ramp to 100 VUs (2 min)
+- Stay at 100 VUs (5 min)
+- Ramp down (2 min)
+
+## Test Scenarios
+
+### Scenario 1: Cache Warm-up
+
+Test cache efficiency after warm-up period:
+
+```bash
+# Phase 1: Populate cache
+python3 loadtest.py --workers 5 --requests 50
+
+# Phase 2: Test cache hits
+python3 loadtest.py --workers 20 --requests 200
+
+# Expected: >80% cache hit rate in Phase 2
+```
+
+### Scenario 2: Cold Start
+
+Test behavior with empty cache:
+
+```bash
+# Clear caches
+docker-compose down -v
+docker-compose up -d
+
+# Run test
+python3 loadtest.py --workers 10 --requests 100
+
+# Expected: Higher latency, all cache misses initially
+```
+
+### Scenario 3: High Concurrency
+
+Test maximum concurrent requests:
+
+```bash
+python3 loadtest.py \
+  --workers 100 \
+  --requests 50 \
+  --think-time 0
+
+# Monitor: CPU, memory, connection count
+```
+
+### Scenario 4: Repository Diversity
+
+Test with many different repositories:
+
+```bash
+python3 loadtest.py \
+  --workers 20 \
+  --requests 100 \
+  --repos $(cat popular-repos.txt)
+
+# Tests cache distribution and eviction
+```
+
+### Scenario 5: Sustained Load
+
+Test stability over time:
+
+```bash
+# Run for 1 hour
+python3 loadtest.py \
+  --workers 10 \
+  --requests 3600 \
+  --think-time 1000
+
+# Monitor: memory leaks, cache growth, error rates
+```
+
+## Interpreting Results
+
+### Key Metrics
+
+**Success Rate:**
+- Target: > 99%
+- Warning: < 99%
+- Critical: < 95%
+
+**Response Time (P95):**
+- Excellent: < 100ms
+- Good: 100-500ms
+- Acceptable: 500-1000ms
+- Poor: > 1000ms
+
+**Cache Hit Rate:**
+- Excellent: > 90%
+- Good: 80-90%
+- Acceptable: 70-80%
+- Poor: < 70%
+
+**Throughput:**
+- Single instance: 500-1000 req/sec
+- Per sidecar: 50-100 req/sec (sufficient for most workloads)
+
+### Performance Baselines
+
+**Cached requests (hit):**
+```
+Min:    5-10ms    (memory access)
+P50:    10-20ms   (disk read)
+P95:    50-100ms  (cold disk cache)
+P99:    100-200ms (contention)
+Max:    500ms+    (GC pauses)
+```
+
+**Cache miss (fetch from upstream):**
+```
+Min:    100ms     (small repo, fast network)
+P50:    500ms     (typical)
+P95:    2000ms    (large repo)
+P99:    5000ms    (very large repo)
+Max:    30000ms   (timeout)
+```
+
+## Capacity Planning
+
+### Single Instance Capacity
+
+Based on typical workloads:
+
+| Metric | Value |
+|--------|-------|
+| Max requests/sec | 500-1000 |
+| Concurrent connections | 1000 |
+| Cache size | 100GB-1TB |
+| CPU (sustained) | 2-4 cores |
+| Memory | 4-8GB |
+
+### Sidecar Capacity
+
+Per-pod capacity:
+
+| Metric | Value |
+|--------|-------|
+| Requests/hour | 100-1000 |
+| Peak requests/sec | 10-50 |
+| Cache size | 1-10GB |
+| CPU | 250m-1 core |
+| Memory | 512MB-2GB |
+
+### Scaling Formula
+
+```
+Required pods = (Peak requests/sec) / (Requests per pod/sec)
+
+Example:
+- Peak traffic: 1000 req/sec
+- Per pod capacity: 10 req/sec
+- Required pods: 100
+
+With 50% buffer: 150 pods
+```
+
+## Monitoring During Tests
+
+### HAProxy Stats
+
+```bash
+open http://localhost:8404
+
+# Key metrics:
+# - Request distribution across instances
+# - Health check status
+# - Error rates per backend
+```
+
+### Prometheus Queries
+
+```promql
+# Cache hit rate
+rate(cache_hits_total[5m]) / rate(requests_total[5m])
+
+# Request latency (P95)
+histogram_quantile(0.95, rate(request_duration_seconds_bucket[5m]))
+
+# Error rate
+rate(errors_total[5m]) / rate(requests_total[5m])
+
+# Requests per second
+rate(requests_total[5m])
+```
+
+### System Metrics
+
+```bash
+# CPU usage
+docker stats goblet-1 goblet-2 goblet-3
+
+# Disk I/O
+docker exec goblet-1 iostat -x 1
+
+# Network
+docker exec goblet-1 iftop -i eth0
+```
+
+## Troubleshooting
+
+### High Latency
+
+**Symptoms:** P95 > 1000ms
+
+**Diagnosis:**
+```bash
+# Check cache hit rate
+curl http://localhost:8080/metrics | grep cache_hit_rate
+
+# Check disk I/O
+docker exec goblet-1 iostat -x
+
+# Check network latency to upstream
+docker exec goblet-1 ping -c 10 github.com
+```
+
+**Solutions:**
+- Increase cache size
+- Use faster storage (SSD)
+- Add more instances
+- Pre-warm cache
+
+### High Error Rate
+
+**Symptoms:** Errors > 5%
+
+**Diagnosis:**
+```bash
+# Check logs
+docker-compose logs goblet-1 | grep ERROR
+
+# Check upstream connectivity
+docker exec goblet-1 curl -I https://github.com
+```
+
+**Solutions:**
+- Verify upstream connectivity
+- Check authentication
+- Increase timeout values
+- Review rate limiting
+
+### Uneven Load Distribution
+
+**Symptoms:** One instance much busier than others
+
+**Diagnosis:**
+```bash
+# Check HAProxy distribution
+curl http://localhost:8404 | grep -A 20 goblet_shards
+```
+
+**Solutions:**
+- Verify consistent hashing configured
+- Check if specific repos dominate traffic
+- Review routing algorithm
+
+### Memory Growth
+
+**Symptoms:** Memory usage increases over time
+
+**Diagnosis:**
+```bash
+# Monitor memory over time
+watch -n 5 'docker stats --no-stream goblet-1'
+
+# Check cache size
+docker exec goblet-1 du -sh /cache
+```
+
+**Solutions:**
+- Set cache size limits
+- Enable LRU eviction
+- Increase memory limits
+- Review for memory leaks
+
+## Best Practices
+
+### Before Testing
+
+1. **Define objectives:**
+   - What are you testing?
+   - What metrics matter?
+   - What's the success criteria?
+
+2. **Prepare environment:**
+   - Clean state (clear caches if needed)
+   - Monitoring configured
+   - Baseline metrics captured
+
+3. **Plan test scenarios:**
+   - Realistic traffic patterns
+   - Representative repository mix
+   - Appropriate duration
+
+### During Testing
+
+1. **Monitor actively:**
+   - Watch dashboards
+   - Check logs for errors
+   - Note any anomalies
+
+2. **Document observations:**
+   - Screenshot metrics
+   - Record configuration
+   - Note any changes made
+
+3. **Adjust gradually:**
+   - Change one variable at a time
+   - Allow time to stabilize
+   - Compare with baseline
+
+### After Testing
+
+1. **Analyze results:**
+   - Compare against targets
+   - Identify bottlenecks
+   - Document findings
+
+2. **Save data:**
+   - Export metrics
+   - Save logs
+   - Archive configurations
+
+3. **Create action items:**
+   - Performance improvements needed
+   - Configuration changes
+   - Scaling requirements
+
+## Example Test Plan
+
+### Objective
+Validate Goblet can handle 1M requests/month with sidecar pattern.
+
+### Setup
+- 10 pods with sidecars
+- 1GB cache per pod
+- Representative repo mix
+
+### Test Phases
+
+**Phase 1: Baseline (30 min)**
+```bash
+# Light load to warm up cache
+python3 loadtest.py --workers 5 --requests 100
+```
+*Expected: Establish baseline latency and hit rate*
+
+**Phase 2: Normal Load (1 hour)**
+```bash
+# Simulate average daily traffic
+python3 loadtest.py --workers 10 --requests 1000
+```
+*Expected: P95 < 500ms, hit rate > 80%*
+
+**Phase 3: Peak Load (30 min)**
+```bash
+# Simulate 10x peak
+python3 loadtest.py --workers 100 --requests 100
+```
+*Expected: P95 < 1000ms, no errors*
+
+**Phase 4: Sustained Peak (2 hours)**
+```bash
+# Validate stability at peak
+python3 loadtest.py --workers 50 --requests 2000
+```
+*Expected: Stable performance, no memory leaks*
+
+### Success Criteria
+- ✅ Success rate > 99%
+- ✅ P95 latency < 500ms (normal), < 1000ms (peak)
+- ✅ Cache hit rate > 80%
+- ✅ No memory leaks
+- ✅ No errors under sustained load
+
+## Summary
+
+**Quick Reference:**
+
+```bash
+# Start environment
+cd loadtest && make start
+
+# Run test
+make loadtest-python
+
+# View stats
+open http://localhost:8404
+
+# Cleanup
+make stop
+```
+
+**Key Takeaways:**
+
+1. Start with warm-up phase
+2. Test realistic scenarios
+3. Monitor actively
+4. Document everything
+5. Plan for peak + buffer
+
+**Next Steps:**
+
+- Run baseline tests in dev
+- Validate capacity planning
+- Test failure scenarios
+- Move to staging
+- Production rollout with monitoring
+
+For detailed test scripts, see [`loadtest/`](../../loadtest/) directory.
diff --git a/loadtest/Makefile b/loadtest/Makefile
new file mode 100644
index 0000000..0da9e79
--- /dev/null
+++ b/loadtest/Makefile
@@ -0,0 +1,107 @@
+.PHONY: help start stop restart status logs clean loadtest-python loadtest-k6 stats
+
+# Default target
+help:
+	@echo "Goblet Load Test Harness"
+	@echo ""
+	@echo "Available targets:"
+	@echo "  start          - Start load test environment (HAProxy + 3 Goblet instances + monitoring)"
+	@echo "  stop           - Stop all containers"
+	@echo "  restart        - Restart all containers"
+	@echo "  status         - Show container status"
+	@echo "  logs           - Tail logs from all containers"
+	@echo "  stats          - Show HAProxy stats"
+	@echo "  metrics        - Show Prometheus metrics from Goblet instances"
+	@echo "  loadtest-python - Run Python-based load test"
+	@echo "  loadtest-k6    - Run k6-based load test"
+	@echo "  clean          - Stop and remove all containers and volumes"
+	@echo ""
+	@echo "Monitoring URLs:"
+	@echo "  HAProxy Stats:  http://localhost:8404"
+	@echo "  Prometheus:     http://localhost:9090"
+	@echo "  Grafana:        http://localhost:3000 (admin/admin)"
+	@echo "  Goblet API:     http://localhost:8080"
+
+# Start the load test environment
+start:
+	@echo "Starting load test environment..."
+	docker-compose -f ../docker-compose.loadtest.yml up -d
+	@echo ""
+	@echo "Waiting for services to be ready..."
+	@sleep 5
+	@echo ""
+	@echo "Services started!"
+	@echo "  HAProxy:   http://localhost:8080"
+	@echo "  Stats:     http://localhost:8404"
+	@echo "  Prometheus: http://localhost:9090"
+	@echo "  Grafana:   http://localhost:3000"
+
+# Stop all containers
+stop:
+	@echo "Stopping load test environment..."
+	docker-compose -f ../docker-compose.loadtest.yml down
+
+# Restart all containers
+restart:
+	@echo "Restarting load test environment..."
+	docker-compose -f ../docker-compose.loadtest.yml restart
+
+# Show container status
+status:
+	@echo "Container Status:"
+	@echo ""
+	docker-compose -f ../docker-compose.loadtest.yml ps
+
+# Tail logs from all containers
+logs:
+	docker-compose -f ../docker-compose.loadtest.yml logs -f
+
+# Show HAProxy stats
+stats:
+	@echo "HAProxy Statistics:"
+	@echo "==================="
+	@echo ""
+	@curl -s http://localhost:8404 | grep -A 20 "goblet_shards" || echo "HAProxy not responding. Is it running? Try: make start"
+	@echo ""
+	@echo "Full stats available at: http://localhost:8404"
+
+# Show Prometheus metrics from Goblet instances
+metrics:
+	@echo "Goblet Instance Metrics:"
+	@echo "========================"
+	@echo ""
+	@echo "Instance 1:"
+	@curl -s http://localhost:8080/metrics 2>/dev/null | grep -E "^goblet_" | head -n 10 || echo "Not responding"
+	@echo ""
+	@echo "Open Prometheus for detailed metrics: http://localhost:9090"
+
+# Run Python-based load test
+loadtest-python:
+	@echo "Running Python load test..."
+	@echo ""
+	python3 loadtest.py \
+		--url http://localhost:8080 \
+		--workers 20 \
+		--requests 50 \
+		--think-time 100 \
+		--repos github.com/kubernetes/kubernetes github.com/golang/go \
+		--output results-$$(date +%Y%m%d-%H%M%S).json
+
+# Run k6-based load test
+loadtest-k6:
+	@echo "Running k6 load test..."
+	@echo ""
+	docker-compose -f ../docker-compose.loadtest.yml --profile loadtest up k6
+
+# Clean everything
+clean:
+	@echo "Cleaning up load test environment..."
+	@echo "This will remove all containers and volumes!"
+	@read -p "Are you sure? [y/N] " -n 1 -r; \
+	echo; \
+	if [[ $$REPLY =~ ^[Yy]$$ ]]; then \
+		docker-compose -f ../docker-compose.loadtest.yml down -v; \
+		echo "Cleanup complete!"; \
+	else \
+		echo "Cleanup cancelled."; \
+	fi
diff --git a/loadtest/README.md b/loadtest/README.md
new file mode 100644
index 0000000..0dcfc33
--- /dev/null
+++ b/loadtest/README.md
@@ -0,0 +1,511 @@
+# Goblet Load Testing & Deployment Patterns
+
+This directory contains load testing infrastructure and deployment patterns for scaling Goblet in production environments.
+
+## ⚠️ CRITICAL SECURITY NOTICE
+
+**Before deploying Goblet with private repositories, read the [Security Isolation Guide](../docs/security/isolation-strategies.md)**
+
+Goblet's default configuration is **UNSAFE for multi-tenant deployments with private repositories**. Users can access each other's cached private repos. See [Security](#security-considerations) section below.
+
+## Table of Contents
+
+1. [Security Considerations](#security-considerations)
+2. [Architecture Overview](#architecture-overview)
+3. [Load Testing Setup](#load-testing-setup)
+4. [Deployment Patterns](#deployment-patterns)
+5. [Scaling Considerations](#scaling-considerations)
+6. [Sidecar Pattern for Terraform](#sidecar-pattern-for-terraform)
+
+---
+
+## Security Considerations
+
+### The Problem
+
+**Default cache key:** `/cache/{host}/{repo-path}` - NO user/tenant identifier
+**Risk:** User A's private repos accessible to User B
+
+### Solutions (Pick One)
+
+| Pattern | Security | Storage | Complexity | Use Case |
+|---------|----------|---------|------------|----------|
+| **Sidecar** (Recommended) | ✅ Perfect | Medium | Low | Terraform, CI/CD |
+| **User-Scoped** | ✅ Perfect | High | Medium | Risk scanning |
+| **Tenant-Scoped** | ✅ Good | Medium | Medium | Terraform Cloud |
+| **Network Isolation** | ✅ Perfect | Low | High | Compliance |
+| ❌ **Default (None)** | ❌ UNSAFE | Low | Low | Public repos only |
+
+**Quick Fix:** Use sidecar pattern (one instance per pod). See [`kubernetes-sidecar-deployment.yaml`](./kubernetes-sidecar-deployment.yaml)
+
+**Detailed Guide:** See [Security Isolation Strategies](../docs/security/isolation-strategies.md)
+
+**Architecture:** See [Design Decisions](../docs/architecture/design-decisions.md)
+
+---
+
+## Architecture Overview
+
+### Stateful vs Stateless
+
+**Goblet is a STATEFUL caching proxy** with the following characteristics:
+
+- **File-based cache**: Bare Git repositories stored on local disk
+- **In-process state**: `sync.Map` for repository management with per-repo mutexes
+- **Single-writer assumption**: Git operations expect exclusive access to repositories
+- **No distributed coordination**: No distributed locks or leader election
+
+### Scaling Implications
+
+❌ **NOT SAFE**: Multiple instances sharing the same cache directory
+- Git operations will race and corrupt repositories
+- In-memory locks are process-local
+
+✅ **SAFE**:
+- Single instance per cache directory
+- Multiple instances with repository sharding
+- Sidecar pattern (one cache per application pod)
+
+---
+
+## Load Testing Setup
+
+### Prerequisites
+
+- Docker and Docker Compose
+- Python 3.8+ (for Python-based load test)
+- OR k6 (for JavaScript-based load test)
+
+### Quick Start
+
+1. **Start the load test environment:**
+
+   ```bash
+   docker-compose -f docker-compose.loadtest.yml up -d
+   ```
+
+   This starts:
+   - 3 Goblet instances (goblet-1, goblet-2, goblet-3)
+   - HAProxy load balancer with consistent hashing (port 8080)
+   - Prometheus metrics collector (port 9090)
+   - Grafana dashboard (port 3000)
+
+2. **View HAProxy stats:**
+
+   ```bash
+   open http://localhost:8404
+   ```
+
+3. **Run Python load test:**
+
+   ```bash
+   python3 loadtest/loadtest.py \
+     --url http://localhost:8080 \
+     --workers 20 \
+     --requests 100 \
+     --repos github.com/kubernetes/kubernetes github.com/golang/go
+   ```
+
+4. **Run k6 load test:**
+
+   ```bash
+   docker-compose -f docker-compose.loadtest.yml --profile loadtest up k6
+   ```
+
+5. **View Grafana dashboards:**
+
+   ```bash
+   open http://localhost:3000
+   # Login: admin/admin
+   ```
+
+### Load Test Scripts
+
+#### Python Script (`loadtest.py`)
+
+Flexible, easy-to-customize load test script:
+
+```bash
+python3 loadtest/loadtest.py \
+  --url http://localhost:8080 \
+  --workers 50 \
+  --requests 200 \
+  --think-time 50 \
+  --repos github.com/user/repo1 github.com/user/repo2 \
+  --output results.json
+```
+
+**Options:**
+- `--url`: Target URL (default: http://localhost:8080)
+- `--workers`: Number of concurrent workers (default: 10)
+- `--requests`: Requests per worker (default: 100)
+- `--think-time`: Delay between requests in ms (default: 100)
+- `--repos`: List of repository paths to test
+- `--output`: JSON output file for results
+
+#### k6 Script (`k6-script.js`)
+
+Advanced load testing with gradual ramp-up:
+
+```javascript
+// Stages defined in k6-script.js:
+// - Ramp up to 10 VUs over 2 minutes
+// - Stay at 10 VUs for 5 minutes
+// - Ramp up to 50 VUs over 2 minutes
+// - Stay at 50 VUs for 5 minutes
+// - Ramp up to 100 VUs over 2 minutes
+// - Stay at 100 VUs for 5 minutes
+// - Ramp down over 2 minutes
+```
+
+Customize repositories in `k6-script.js` line 22.
+
+---
+
+## Deployment Patterns
+
+### Pattern 1: Repository Sharding with HAProxy
+
+**Use case**: Centralized cache with horizontal scaling
+
+**Architecture:**
+```
+              HAProxy (consistent hashing on URL)
+                        |
+        +---------------+---------------+
+        |               |               |
+    Goblet-1        Goblet-2        Goblet-3
+   (repos A-H)     (repos I-P)     (repos Q-Z)
+        |               |               |
+    Cache Dir 1     Cache Dir 2     Cache Dir 3
+```
+
+**Implementation:**
+
+```yaml
+# See docker-compose.loadtest.yml
+# HAProxy uses: balance uri whole
+```
+
+**Pros:**
+- True horizontal scaling
+- Linear throughput increase
+- Each instance caches a subset of repos
+
+**Cons:**
+- Cache efficiency reduced (each instance has partial cache)
+- Need sticky routing per repository
+- Adds load balancer complexity
+
+### Pattern 2: Sidecar Pattern (Recommended for Terraform)
+
+**Use case**: Large-scale deployments with millions of requests per month
+
+**Architecture:**
+```
+Kubernetes Pod
+  |
+  +-- Terraform Agent Container
+  |     (git -> http://localhost:8080)
+  |
+  +-- Goblet Sidecar Container
+        (port 8080, cache: /cache)
+        |
+        +-- EmptyDir Volume (10Gi)
+```
+
+**Implementation:**
+
+See `kubernetes-sidecar-deployment.yaml`
+
+**Benefits:**
+- ✅ Zero network latency (localhost)
+- ✅ Pod-scoped cache lifecycle
+- ✅ Natural workload partitioning
+- ✅ No coordination needed
+- ✅ Scales linearly with pod count
+- ✅ Perfect for Terraform Cloud Agents
+
+**Configuration:**
+
+```yaml
+# In Terraform agent container:
+env:
+  - name: HTTP_PROXY
+    value: "http://localhost:8080"
+  # OR
+  - name: GIT_CONFIG_KEY_0
+    value: "http.proxy"
+  - name: GIT_CONFIG_VALUE_0
+    value: "http://localhost:8080"
+```
+
+### Pattern 3: Regional Instances
+
+**Use case**: Multi-region deployments with geo-distributed teams
+
+**Architecture:**
+```
+US-EAST Region          EU-WEST Region          APAC Region
+  |                       |                       |
+Goblet Instance       Goblet Instance       Goblet Instance
+(10GB cache)          (10GB cache)          (10GB cache)
+```
+
+**Pros:**
+- Low latency for regional users
+- Independent failure domains
+- Simple deployment model
+
+**Cons:**
+- Cache duplication across regions
+- Higher storage costs
+
+---
+
+## Scaling Considerations
+
+### When to Scale
+
+**Vertical Scaling (increase instance size):**
+- CPU bound: Many concurrent requests, protocol parsing
+- Memory bound: Large number of cached repositories
+- Disk I/O bound: Frequent cache misses, large repos
+
+**Horizontal Scaling (add instances):**
+- Request rate exceeds single instance capacity (~1000 req/s)
+- Need high availability / redundancy
+- Regional distribution required
+- Workload naturally partitioned (e.g., per-tenant)
+
+### Metrics to Monitor
+
+1. **Request Rate**: requests/sec per instance
+2. **Cache Hit Rate**: % of requests served from cache
+3. **Response Latency**: p50, p95, p99 latencies
+4. **Disk Usage**: cache directory size
+5. **Git Fetch Duration**: time to fetch from upstream
+6. **Error Rate**: failed requests / total requests
+
+### Capacity Planning
+
+**Single Instance Capacity (estimated):**
+- **Request Rate**: 500-1000 req/s (depends on cache hit rate)
+- **Concurrent Connections**: 1000+
+- **Cached Repositories**: 100-1000 (depends on size)
+- **Disk I/O**: ~100 MB/s sustained
+
+**For millions of requests/month:**
+```
+1,000,000 requests/month = ~0.4 requests/sec average
+With peak factor 10x = ~4 requests/sec peak
+Single instance: SUFFICIENT for average load
+Sidecar pattern: BETTER for peak handling + resilience
+```
+
+### Recommended Architecture for Terraform Cloud Scale
+
+**Deployment:**
+- 100 Terraform Agent pods
+- Each pod with Goblet sidecar
+- 10GB cache per pod
+- HPA (Horizontal Pod Autoscaler): 100-500 pods
+
+**Expected Performance:**
+- 1M requests/month = ~10K requests/pod/month
+- Avg: 0.004 req/sec per pod (trivial)
+- Peak (10x): 0.04 req/sec per pod (trivial)
+- **Cache hit rate**: 80-95% (after warm-up)
+
+**Benefits:**
+- No shared state = no coordination overhead
+- Linear scaling with pod count
+- Cache warm-up happens naturally per pod
+- Failed pods don't affect others
+- Rolling updates are safe
+
+---
+
+## Sidecar Pattern for Terraform
+
+### Why Sidecar for Terraform Agents?
+
+1. **Workload Isolation**: Each Terraform run is independent
+2. **Cache Locality**: Terraform runs often use same repos
+3. **No Network Overhead**: Localhost communication
+4. **Natural Partitioning**: No need for distributed coordination
+5. **Pod Lifecycle**: Cache created/destroyed with pod
+
+### Deployment Steps
+
+1. **Build Goblet container image:**
+
+   ```bash
+   docker build -t goblet:latest .
+   docker tag goblet:latest your-registry/goblet:v1.0.0
+   docker push your-registry/goblet:v1.0.0
+   ```
+
+2. **Deploy to Kubernetes:**
+
+   ```bash
+   kubectl create namespace terraform-agents
+   kubectl apply -f loadtest/kubernetes-sidecar-deployment.yaml
+   ```
+
+3. **Verify deployment:**
+
+   ```bash
+   kubectl get pods -n terraform-agents
+   kubectl logs -n terraform-agents <pod-name> -c goblet-cache
+   ```
+
+4. **Monitor with Prometheus:**
+
+   ```bash
+   kubectl port-forward -n terraform-agents svc/terraform-agent-metrics 8080:8080
+   curl http://localhost:8080/metrics
+   ```
+
+### Configuration Tips
+
+**Cache Size:**
+```yaml
+volumes:
+  - name: git-cache
+    emptyDir:
+      sizeLimit: 10Gi  # Adjust based on repo sizes
+```
+
+**Resource Allocation:**
+```yaml
+resources:
+  requests:
+    cpu: "500m"      # Increase for cache-heavy workloads
+    memory: "1Gi"    # Increase for many repos
+  limits:
+    cpu: "1"
+    memory: "2Gi"
+```
+
+**Autoscaling:**
+```yaml
+minReplicas: 10    # Baseline capacity
+maxReplicas: 100   # Peak capacity
+```
+
+### Testing Sidecar Deployment
+
+```bash
+# Port forward to a pod
+kubectl port-forward -n terraform-agents <pod-name> 8080:8080
+
+# Test from your local machine
+python3 loadtest/loadtest.py \
+  --url http://localhost:8080 \
+  --workers 5 \
+  --requests 50
+```
+
+---
+
+## Troubleshooting
+
+### Load Balancer Issues
+
+**Problem**: Requests not evenly distributed
+
+**Check HAProxy stats:**
+```bash
+curl http://localhost:8404
+```
+
+**Solution**: Verify consistent hashing is working:
+```bash
+# Same repo should always go to same backend
+for i in {1..10}; do
+  curl -v http://localhost:8080/github.com/kubernetes/kubernetes/info/refs \
+    2>&1 | grep "X-Served-By"
+done
+```
+
+### Cache Corruption
+
+**Problem**: Git errors, repository corruption
+
+**Likely cause**: Multiple instances sharing same cache directory
+
+**Solution**:
+1. Stop all instances
+2. Clear cache: `rm -rf /cache/*`
+3. Ensure proper sharding/sidecar deployment
+4. Restart with isolated caches
+
+### High Memory Usage
+
+**Problem**: Goblet using excessive memory
+
+**Likely cause**: Many large repositories cached
+
+**Solution**:
+1. Reduce cache size with LRU eviction (future enhancement)
+2. Increase sizeLimit for emptyDir volume
+3. Partition repositories across more instances
+
+### Slow Response Times
+
+**Problem**: High p95/p99 latencies
+
+**Diagnosis**:
+```bash
+# Check metrics
+curl http://localhost:8080/metrics | grep git_fetch
+
+# Check upstream latency
+curl http://localhost:8080/metrics | grep upstream_duration
+```
+
+**Solutions**:
+- Increase worker pool size
+- Add more instances (sharding)
+- Optimize upstream connectivity
+- Add backup storage for cold starts
+
+---
+
+## Future Enhancements
+
+### Distributed Coordination
+
+To enable true shared-cache multi-instance deployment:
+
+1. **Distributed locks** (Redis, etcd)
+2. **Leader election** per repository
+3. **Cache coherency protocol**
+4. **Shared metadata store**
+
+### Cache Management
+
+1. **LRU eviction** for size-bounded cache
+2. **Metrics-based warming** (pre-fetch popular repos)
+3. **Tiered storage** (hot/cold separation)
+4. **Cache replication** for HA
+
+---
+
+## Related Documentation
+
+- [Goblet README](../README.md)
+- [Offline Mode Documentation](../testing/TEST_COVERAGE.md)
+- [Docker Compose Configuration](../docker-compose.loadtest.yml)
+- [Kubernetes Deployment](./kubernetes-sidecar-deployment.yaml)
+
+---
+
+## Questions & Support
+
+For issues or questions about load testing:
+1. Check HAProxy stats: http://localhost:8404
+2. Check Prometheus metrics: http://localhost:9090
+3. Check Grafana dashboards: http://localhost:3000
+4. Review container logs: `docker-compose logs -f goblet-1`
diff --git a/loadtest/grafana-datasources.yml b/loadtest/grafana-datasources.yml
new file mode 100644
index 0000000..bb009bb
--- /dev/null
+++ b/loadtest/grafana-datasources.yml
@@ -0,0 +1,9 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    editable: false
diff --git a/loadtest/haproxy.cfg b/loadtest/haproxy.cfg
new file mode 100644
index 0000000..3fae24c
--- /dev/null
+++ b/loadtest/haproxy.cfg
@@ -0,0 +1,59 @@
+global
+    log stdout format raw local0
+    maxconn 4096
+    stats socket /var/run/haproxy.sock mode 600 level admin
+    stats timeout 2m
+
+defaults
+    log global
+    mode http
+    option httplog
+    option dontlognull
+    timeout connect 5s
+    timeout client 60s
+    timeout server 60s
+    timeout http-request 10s
+    timeout http-keep-alive 2s
+
+# Stats page
+frontend stats
+    bind *:8404
+    stats enable
+    stats uri /
+    stats refresh 5s
+    stats show-legends
+    stats show-node
+
+# Main frontend - accepts Git protocol requests
+frontend git_proxy
+    bind *:8080
+    default_backend goblet_shards
+
+    # Capture the request path for routing
+    http-request set-header X-Request-Path %[path]
+
+    # Log backend selection
+    http-request capture path len 256
+    http-response set-header X-Served-By %[srv_name]
+
+# Backend with consistent hashing by URL path
+# This ensures the same repository always goes to the same instance
+backend goblet_shards
+    balance uri whole
+    hash-type consistent
+
+    # Health checks
+    option httpchk GET /healthz
+    http-check expect status 200
+
+    # Servers (Goblet instances)
+    server goblet-1 goblet-1:8080 check inter 5s fall 3 rise 2
+    server goblet-2 goblet-2:8080 check inter 5s fall 3 rise 2
+    server goblet-3 goblet-3:8080 check inter 5s fall 3 rise 2
+
+    # Connection tuning for Git operations
+    timeout server 300s
+    timeout connect 10s
+
+    # Retry policy - don't retry on same server to avoid corruption
+    retries 0
diff --git a/loadtest/k6-script.js b/loadtest/k6-script.js
new file mode 100644
index 0000000..06e7a32
--- /dev/null
+++ b/loadtest/k6-script.js
@@ -0,0 +1,171 @@
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend, Counter } from 'k6/metrics';
+
+// Custom metrics
+const errorRate = new Rate('errors');
+const cacheHitRate = new Rate('cache_hits');
+const requestDuration = new Trend('request_duration');
+const requestCounter = new Counter('requests_total');
+
+// Load test configuration
+export const options = {
+  stages: [
+    { duration: '2m', target: 10 },   // Ramp up to 10 VUs
+    { duration: '5m', target: 10 },   // Stay at 10 VUs
+    { duration: '2m', target: 50 },   // Ramp up to 50 VUs
+    { duration: '5m', target: 50 },   // Stay at 50 VUs
+    { duration: '2m', target: 100 },  // Ramp up to 100 VUs
+    { duration: '5m', target: 100 },  // Stay at 100 VUs
+    { duration: '2m', target: 0 },    // Ramp down
+  ],
+  thresholds: {
+    'http_req_duration': ['p(95)<5000'], // 95% of requests should be below 5s
+    'errors': ['rate<0.1'],               // Error rate should be below 10%
+    'http_req_failed': ['rate<0.05'],     // Failed requests below 5%
+  },
+};
+
+// Simulated repository list (adjust to match your test repos)
+const repositories = [
+  'github.com/kubernetes/kubernetes',
+  'github.com/golang/go',
+  'github.com/torvalds/linux',
+  'github.com/facebook/react',
+  'github.com/microsoft/vscode',
+  'github.com/hashicorp/terraform',
+  'github.com/nodejs/node',
+  'github.com/rust-lang/rust',
+  'github.com/apache/spark',
+  'github.com/tensorflow/tensorflow',
+];
+
+// Git protocol v2 ls-refs command
+function createLsRefsRequest() {
+  return '0014command=ls-refs\n' +
+         '0001' +
+         '0009peel\n' +
+         '000csymrefs\n' +
+         '000bunborn\n' +
+         '0014ref-prefix refs/\n' +
+         '0000';
+}
+
+// Git protocol v2 fetch command (minimal)
+function createFetchRequest(wantRef) {
+  return '0011command=fetch\n' +
+         '0001' +
+         '000cthin-pack\n' +
+         '000cofs-delta\n' +
+         `00${(32 + wantRef.length).toString(16).padStart(2, '0')}want ${wantRef}\n` +
+         '00000009done\n' +
+         '0000';
+}
+
+// Select random repository
+function getRandomRepo() {
+  return repositories[Math.floor(Math.random() * repositories.length)];
+}
+
+export default function () {
+  const targetUrl = __ENV.TARGET_URL || 'http://localhost:8080';
+  const repo = getRandomRepo();
+  const repoUrl = `${targetUrl}/${repo}/git-upload-pack`;
+
+  // Scenario 1: ls-refs request (80% of requests)
+  if (Math.random() < 0.8) {
+    const lsRefsPayload = createLsRefsRequest();
+
+    const params = {
+      headers: {
+        'Content-Type': 'application/x-git-upload-pack-request',
+        'Git-Protocol': 'version=2',
+        'Accept': 'application/x-git-upload-pack-result',
+      },
+      timeout: '60s',
+    };
+
+    const start = Date.now();
+    const response = http.post(repoUrl, lsRefsPayload, params);
+    const duration = Date.now() - start;
+
+    requestCounter.add(1);
+    requestDuration.add(duration);
+
+    const success = check(response, {
+      'ls-refs status is 200': (r) => r.status === 200,
+      'ls-refs has body': (r) => r.body.length > 0,
+      'ls-refs is valid': (r) => r.body.includes('refs/'),
+    });
+
+    errorRate.add(!success);
+
+    // Check if served from cache (custom header from HAProxy)
+    if (response.headers['X-Served-By']) {
+      console.log(`Repo ${repo} served by ${response.headers['X-Served-By']}`);
+    }
+  }
+  // Scenario 2: fetch request (20% of requests)
+  else {
+    // First, get refs with ls-refs
+    const lsRefsPayload = createLsRefsRequest();
+    const params = {
+      headers: {
+        'Content-Type': 'application/x-git-upload-pack-request',
+        'Git-Protocol': 'version=2',
+        'Accept': 'application/x-git-upload-pack-result',
+      },
+      timeout: '60s',
+    };
+
+    const lsRefsResponse = http.post(repoUrl, lsRefsPayload, params);
+
+    if (lsRefsResponse.status === 200) {
+      // Parse a ref from response (simplified - assumes valid format)
+      const refMatch = lsRefsResponse.body.match(/([0-9a-f]{40})\s+refs\/heads\/\w+/);
+
+      if (refMatch && refMatch[1]) {
+        const wantRef = refMatch[1];
+        const fetchPayload = createFetchRequest(wantRef);
+
+        const start = Date.now();
+        const fetchResponse = http.post(repoUrl, fetchPayload, params);
+        const duration = Date.now() - start;
+
+        requestCounter.add(1);
+        requestDuration.add(duration);
+
+        const success = check(fetchResponse, {
+          'fetch status is 200': (r) => r.status === 200,
+          'fetch has pack data': (r) => r.body.length > 0,
+        });
+
+        errorRate.add(!success);
+      }
+    }
+  }
+
+  // Think time between requests (simulates real user behavior)
+  sleep(Math.random() * 3 + 1); // 1-4 seconds
+}
+
+export function handleSummary(data) {
+  return {
+    'stdout': textSummary(data, { indent: ' ', enableColors: true }),
+    '/tmp/k6-summary.json': JSON.stringify(data),
+  };
+}
+
+function textSummary(data, options) {
+  const indent = options.indent || '';
+  const enableColors = options.enableColors || false;
+
+  let summary = '\n' + indent + '=== Load Test Summary ===\n\n';
+
+  summary += indent + `Requests: ${data.metrics.requests_total.values.count}\n`;
+  summary += indent + `Errors: ${(data.metrics.errors.values.rate * 100).toFixed(2)}%\n`;
+  summary += indent + `Request Duration (p95): ${data.metrics.request_duration.values['p(95)']}ms\n`;
+  summary += indent + `HTTP Req Duration (p95): ${data.metrics.http_req_duration.values['p(95)']}ms\n`;
+
+  return summary;
+}
diff --git a/loadtest/kubernetes-sidecar-deployment.yaml b/loadtest/kubernetes-sidecar-deployment.yaml
new file mode 100644
index 0000000..25308ab
--- /dev/null
+++ b/loadtest/kubernetes-sidecar-deployment.yaml
@@ -0,0 +1,204 @@
+---
+# ConfigMap for Goblet sidecar configuration
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: goblet-sidecar-config
+  namespace: terraform-agents
+data:
+  # Basic configuration - customize based on your auth needs
+  goblet.env: |
+    GOBLET_PORT=8080
+    GOBLET_CACHE_ROOT=/cache
+    GOBLET_LOG_LEVEL=info
+
+---
+# Terraform Agent with Goblet Sidecar
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: terraform-agent
+  namespace: terraform-agents
+  labels:
+    app: terraform-agent
+spec:
+  replicas: 10  # Scale to handle load
+  selector:
+    matchLabels:
+      app: terraform-agent
+  template:
+    metadata:
+      labels:
+        app: terraform-agent
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      # Shared cache volume between sidecar and main container
+      volumes:
+        - name: git-cache
+          emptyDir:
+            sizeLimit: 10Gi  # Adjust based on your repo sizes
+        - name: goblet-config
+          configMap:
+            name: goblet-sidecar-config
+
+      containers:
+        # Main Terraform Agent Container
+        - name: terraform-agent
+          image: your-terraform-agent:latest
+          env:
+            # Configure git to use local proxy
+            - name: HTTP_PROXY
+              value: "http://localhost:8080"
+            - name: HTTPS_PROXY
+              value: "http://localhost:8080"
+            # Alternatively, configure git directly
+            - name: GIT_CONFIG_COUNT
+              value: "1"
+            - name: GIT_CONFIG_KEY_0
+              value: "http.proxy"
+            - name: GIT_CONFIG_VALUE_0
+              value: "http://localhost:8080"
+          resources:
+            requests:
+              cpu: "1"
+              memory: "2Gi"
+            limits:
+              cpu: "2"
+              memory: "4Gi"
+
+        # Goblet Sidecar Container
+        - name: goblet-cache
+          image: goblet:latest  # Build from your Dockerfile
+          ports:
+            - containerPort: 8080
+              name: http
+              protocol: TCP
+            - containerPort: 8080
+              name: metrics
+              protocol: TCP
+          envFrom:
+            - configMapRef:
+                name: goblet-sidecar-config
+          volumeMounts:
+            - name: git-cache
+              mountPath: /cache
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: 8080
+            initialDelaySeconds: 10
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /healthz
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "1Gi"
+            limits:
+              cpu: "1"
+              memory: "2Gi"
+          # Security context
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 1000
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: false  # Git needs to write to cache
+
+      # Lifecycle: Goblet should start before main container needs it
+      initContainers:
+        - name: wait-for-goblet
+          image: busybox:1.36
+          command:
+            - sh
+            - -c
+            - |
+              until wget -q --spider http://localhost:8080/healthz; do
+                echo "Waiting for Goblet to be ready..."
+                sleep 2
+              done
+              echo "Goblet is ready!"
+
+---
+# Service for metrics scraping (optional)
+apiVersion: v1
+kind: Service
+metadata:
+  name: terraform-agent-metrics
+  namespace: terraform-agents
+  labels:
+    app: terraform-agent
+spec:
+  clusterIP: None  # Headless service for pod-level metrics
+  selector:
+    app: terraform-agent
+  ports:
+    - name: metrics
+      port: 8080
+      targetPort: 8080
+      protocol: TCP
+
+---
+# ServiceMonitor for Prometheus Operator (optional)
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: goblet-sidecar
+  namespace: terraform-agents
+  labels:
+    app: terraform-agent
+spec:
+  selector:
+    matchLabels:
+      app: terraform-agent
+  endpoints:
+    - port: metrics
+      path: /metrics
+      interval: 30s
+
+---
+# PodDisruptionBudget to ensure availability during rolling updates
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: terraform-agent-pdb
+  namespace: terraform-agents
+spec:
+  minAvailable: 50%
+  selector:
+    matchLabels:
+      app: terraform-agent
+
+---
+# HorizontalPodAutoscaler for dynamic scaling
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: terraform-agent-hpa
+  namespace: terraform-agents
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: terraform-agent
+  minReplicas: 10
+  maxReplicas: 100
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: 80
diff --git a/loadtest/kubernetes-sidecar-secure.yaml b/loadtest/kubernetes-sidecar-secure.yaml
new file mode 100644
index 0000000..171dfae
--- /dev/null
+++ b/loadtest/kubernetes-sidecar-secure.yaml
@@ -0,0 +1,303 @@
+---
+# SECURE Terraform Agent Deployment with Tenant Isolation
+# This configuration ensures proper isolation for multi-tenant scenarios
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: goblet-secure-config
+  namespace: terraform-agents
+data:
+  isolation.json: |
+    {
+      "mode": "tenant",
+      "tenant_header_key": "X-TFC-Workspace-ID",
+      "hash_identifiers": false
+    }
+
+---
+# ServiceAccount with minimal permissions
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: terraform-agent
+  namespace: terraform-agents
+
+---
+# NetworkPolicy to restrict traffic
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: terraform-agent-netpol
+  namespace: terraform-agents
+spec:
+  podSelector:
+    matchLabels:
+      app: terraform-agent
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    # Only allow metrics scraping from Prometheus
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              name: monitoring
+      ports:
+        - protocol: TCP
+          port: 8080
+  egress:
+    # Allow DNS
+    - to:
+        - namespaceSelector: {}
+      ports:
+        - protocol: UDP
+          port: 53
+    # Allow HTTPS to GitHub/upstream
+    - to:
+        - namespaceSelector: {}
+      ports:
+        - protocol: TCP
+          port: 443
+    # Allow localhost (sidecar communication)
+    - to:
+        - podSelector:
+            matchLabels:
+              app: terraform-agent
+      ports:
+        - protocol: TCP
+          port: 8080
+
+---
+# Deployment with Security Hardening
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: terraform-agent-secure
+  namespace: terraform-agents
+  labels:
+    app: terraform-agent
+    security: hardened
+spec:
+  replicas: 10
+  selector:
+    matchLabels:
+      app: terraform-agent
+  template:
+    metadata:
+      labels:
+        app: terraform-agent
+        security: hardened
+      annotations:
+        # Security annotations
+        seccomp.security.alpha.kubernetes.io/pod: "runtime/default"
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      serviceAccountName: terraform-agent
+
+      # Security context for pod
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 1000
+        fsGroup: 1000
+        seccompProfile:
+          type: RuntimeDefault
+
+      volumes:
+        - name: git-cache
+          emptyDir:
+            sizeLimit: 10Gi
+            # Use memory-backed emptyDir for sensitive data (optional)
+            # medium: "Memory"  # Uncomment for in-memory cache
+        - name: goblet-config
+          configMap:
+            name: goblet-secure-config
+        # Optional: Encrypted volume for cache
+        # - name: encrypted-cache
+        #   persistentVolumeClaim:
+        #     claimName: encrypted-cache-pvc
+
+      containers:
+        # Main Terraform Agent Container
+        - name: terraform-agent
+          image: your-terraform-agent:latest
+          env:
+            # Git proxy configuration
+            - name: HTTP_PROXY
+              value: "http://localhost:8080"
+            - name: HTTPS_PROXY
+              value: "http://localhost:8080"
+
+            # Terraform Cloud workspace ID (for tenant isolation)
+            - name: TFC_WORKSPACE_ID
+              value: "ws-example123"  # Should come from pod labels or injection
+
+            # Pass workspace ID to Goblet via custom header
+            - name: GIT_CONFIG_COUNT
+              value: "2"
+            - name: GIT_CONFIG_KEY_0
+              value: "http.proxy"
+            - name: GIT_CONFIG_VALUE_0
+              value: "http://localhost:8080"
+            - name: GIT_CONFIG_KEY_1
+              value: "http.extraHeader"
+            - name: GIT_CONFIG_VALUE_1
+              value: "X-TFC-Workspace-ID: $(TFC_WORKSPACE_ID)"
+
+          # Security context for container
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 1000
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: false
+            capabilities:
+              drop:
+                - ALL
+
+          resources:
+            requests:
+              cpu: "1"
+              memory: "2Gi"
+            limits:
+              cpu: "2"
+              memory: "4Gi"
+
+        # Goblet Sidecar Container (Secure Configuration)
+        - name: goblet-cache
+          image: goblet:latest
+          ports:
+            - containerPort: 8080
+              name: http
+              protocol: TCP
+          env:
+            - name: GOBLET_PORT
+              value: "8080"
+            - name: GOBLET_CACHE_ROOT
+              value: "/cache"
+            - name: GOBLET_LOG_LEVEL
+              value: "info"
+
+            # CRITICAL: Isolation mode configuration
+            - name: GOBLET_ISOLATION_MODE
+              value: "tenant"
+            - name: GOBLET_TENANT_HEADER
+              value: "X-TFC-Workspace-ID"
+
+            # Optional: Enable audit logging
+            - name: GOBLET_AUDIT_LOG
+              value: "true"
+            - name: GOBLET_AUDIT_LOG_PATH
+              value: "/cache/audit.log"
+
+          volumeMounts:
+            - name: git-cache
+              mountPath: /cache
+            - name: goblet-config
+              mountPath: /etc/goblet
+              readOnly: true
+
+          # Security context
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 1000
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: false  # Git needs write access to cache
+            capabilities:
+              drop:
+                - ALL
+
+          # Probes
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: 8080
+            initialDelaySeconds: 10
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /healthz
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 10
+
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "1Gi"
+            limits:
+              cpu: "1"
+              memory: "2Gi"
+
+---
+# PodSecurityPolicy (if using PSP)
+apiVersion: policy/v1beta1
+kind: PodSecurityPolicy
+metadata:
+  name: terraform-agent-psp
+  annotations:
+    seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'runtime/default'
+    seccomp.security.alpha.kubernetes.io/defaultProfileName: 'runtime/default'
+spec:
+  privileged: false
+  allowPrivilegeEscalation: false
+  requiredDropCapabilities:
+    - ALL
+  volumes:
+    - 'configMap'
+    - 'emptyDir'
+    - 'projected'
+    - 'secret'
+    - 'downwardAPI'
+    - 'persistentVolumeClaim'
+  hostNetwork: false
+  hostIPC: false
+  hostPID: false
+  runAsUser:
+    rule: 'MustRunAsNonRoot'
+  seLinux:
+    rule: 'RunAsAny'
+  supplementalGroups:
+    rule: 'RunAsAny'
+  fsGroup:
+    rule: 'RunAsAny'
+  readOnlyRootFilesystem: false
+
+---
+# ResourceQuota per namespace (tenant isolation at cluster level)
+apiVersion: v1
+kind: ResourceQuota
+metadata:
+  name: terraform-agent-quota
+  namespace: terraform-agents
+spec:
+  hard:
+    requests.cpu: "100"
+    requests.memory: 200Gi
+    persistentvolumeclaims: "100"
+    pods: "100"
+
+---
+# LimitRange to prevent resource exhaustion
+apiVersion: v1
+kind: LimitRange
+metadata:
+  name: terraform-agent-limits
+  namespace: terraform-agents
+spec:
+  limits:
+    - max:
+        cpu: "4"
+        memory: "8Gi"
+      min:
+        cpu: "100m"
+        memory: "128Mi"
+      type: Container
+    - max:
+        cpu: "8"
+        memory: "16Gi"
+      min:
+        cpu: "100m"
+        memory: "128Mi"
+      type: Pod
diff --git a/loadtest/loadtest.py b/loadtest/loadtest.py
new file mode 100644
index 0000000..dba4e48
--- /dev/null
+++ b/loadtest/loadtest.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python3
+"""
+Load test harness for Goblet Git caching proxy.
+
+This script simulates multiple concurrent Git clients making requests
+to the proxy, testing cache efficiency, throughput, and stability.
+"""
+
+import argparse
+import concurrent.futures
+import hashlib
+import json
+import random
+import statistics
+import sys
+import time
+from dataclasses import dataclass
+from typing import List, Dict, Tuple
+import requests
+from urllib.parse import urljoin
+
+
+@dataclass
+class TestResult:
+    """Results from a single test request."""
+    success: bool
+    duration_ms: float
+    repo: str
+    operation: str
+    served_by: str = ""
+    error: str = ""
+
+
+class GitProtocolV2Client:
+    """Simple Git protocol v2 client for testing."""
+
+    def __init__(self, base_url: str, timeout: int = 60):
+        self.base_url = base_url
+        self.timeout = timeout
+        self.session = requests.Session()
+
+    def ls_refs(self, repo_path: str) -> Tuple[bool, float, str, str]:
+        """
+        Execute ls-refs command.
+        Returns: (success, duration_ms, served_by, error)
+        """
+        url = urljoin(self.base_url, f"/{repo_path}/git-upload-pack")
+
+        # Git protocol v2 ls-refs payload
+        payload = (
+            b"0014command=ls-refs\n"
+            b"0001"
+            b"0009peel\n"
+            b"000csymrefs\n"
+            b"000bunborn\n"
+            b"0014ref-prefix refs/\n"
+            b"0000"
+        )
+
+        headers = {
+            "Content-Type": "application/x-git-upload-pack-request",
+            "Git-Protocol": "version=2",
+            "Accept": "application/x-git-upload-pack-result",
+        }
+
+        start = time.time()
+        try:
+            response = self.session.post(
+                url, data=payload, headers=headers, timeout=self.timeout
+            )
+            duration_ms = (time.time() - start) * 1000
+
+            if response.status_code != 200:
+                return False, duration_ms, "", f"HTTP {response.status_code}"
+
+            if len(response.content) == 0:
+                return False, duration_ms, "", "Empty response"
+
+            served_by = response.headers.get("X-Served-By", "")
+            return True, duration_ms, served_by, ""
+
+        except Exception as e:
+            duration_ms = (time.time() - start) * 1000
+            return False, duration_ms, "", str(e)
+
+    def fetch(self, repo_path: str, want_ref: str) -> Tuple[bool, float, str, str]:
+        """
+        Execute fetch command.
+        Returns: (success, duration_ms, served_by, error)
+        """
+        url = urljoin(self.base_url, f"/{repo_path}/git-upload-pack")
+
+        # Git protocol v2 fetch payload
+        want_line = f"want {want_ref}\n".encode()
+        payload = (
+            b"0011command=fetch\n"
+            b"0001"
+            b"000cthin-pack\n"
+            b"000cofs-delta\n"
+            + f"{len(want_line) + 4:04x}".encode()
+            + want_line
+            + b"00000009done\n"
+            b"0000"
+        )
+
+        headers = {
+            "Content-Type": "application/x-git-upload-pack-request",
+            "Git-Protocol": "version=2",
+            "Accept": "application/x-git-upload-pack-result",
+        }
+
+        start = time.time()
+        try:
+            response = self.session.post(
+                url, data=payload, headers=headers, timeout=self.timeout
+            )
+            duration_ms = (time.time() - start) * 1000
+
+            if response.status_code != 200:
+                return False, duration_ms, "", f"HTTP {response.status_code}"
+
+            served_by = response.headers.get("X-Served-By", "")
+            return True, duration_ms, served_by, ""
+
+        except Exception as e:
+            duration_ms = (time.time() - start) * 1000
+            return False, duration_ms, "", str(e)
+
+
+class LoadTestRunner:
+    """Orchestrates load testing."""
+
+    def __init__(
+        self,
+        target_url: str,
+        repositories: List[str],
+        num_workers: int = 10,
+        requests_per_worker: int = 100,
+        think_time_ms: int = 100,
+    ):
+        self.target_url = target_url
+        self.repositories = repositories
+        self.num_workers = num_workers
+        self.requests_per_worker = requests_per_worker
+        self.think_time_ms = think_time_ms
+        self.results: List[TestResult] = []
+
+    def worker_task(self, worker_id: int) -> List[TestResult]:
+        """Worker function that executes requests."""
+        client = GitProtocolV2Client(self.target_url)
+        results = []
+
+        for i in range(self.requests_per_worker):
+            # Select random repository
+            repo = random.choice(self.repositories)
+
+            # 80% ls-refs, 20% fetch
+            if random.random() < 0.8:
+                success, duration, served_by, error = client.ls_refs(repo)
+                result = TestResult(
+                    success=success,
+                    duration_ms=duration,
+                    repo=repo,
+                    operation="ls-refs",
+                    served_by=served_by,
+                    error=error,
+                )
+            else:
+                # For fetch, we need a valid ref - use a common one
+                # In real scenario, would ls-refs first
+                dummy_ref = "0" * 40  # Placeholder
+                success, duration, served_by, error = client.fetch(repo, dummy_ref)
+                result = TestResult(
+                    success=success,
+                    duration_ms=duration,
+                    repo=repo,
+                    operation="fetch",
+                    served_by=served_by,
+                    error=error,
+                )
+
+            results.append(result)
+
+            # Progress indicator
+            if (i + 1) % 10 == 0:
+                print(
+                    f"Worker {worker_id}: {i + 1}/{self.requests_per_worker} requests",
+                    end="\r",
+                )
+
+            # Think time
+            if self.think_time_ms > 0:
+                time.sleep(self.think_time_ms / 1000)
+
+        return results
+
+    def run(self) -> Dict:
+        """Execute load test and return summary statistics."""
+        print(f"Starting load test:")
+        print(f"  Target: {self.target_url}")
+        print(f"  Workers: {self.num_workers}")
+        print(f"  Requests per worker: {self.requests_per_worker}")
+        print(f"  Total requests: {self.num_workers * self.requests_per_worker}")
+        print(f"  Repositories: {len(self.repositories)}")
+        print()
+
+        start_time = time.time()
+
+        # Execute workers in parallel
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=self.num_workers
+        ) as executor:
+            futures = [
+                executor.submit(self.worker_task, i) for i in range(self.num_workers)
+            ]
+
+            for future in concurrent.futures.as_completed(futures):
+                self.results.extend(future.result())
+
+        total_duration = time.time() - start_time
+
+        return self._compute_statistics(total_duration)
+
+    def _compute_statistics(self, total_duration: float) -> Dict:
+        """Compute summary statistics from results."""
+        total_requests = len(self.results)
+        successful = [r for r in self.results if r.success]
+        failed = [r for r in self.results if not r.success]
+
+        durations = [r.duration_ms for r in successful]
+
+        # Server distribution
+        server_counts = {}
+        for r in self.results:
+            if r.served_by:
+                server_counts[r.served_by] = server_counts.get(r.served_by, 0) + 1
+
+        # Repository distribution
+        repo_requests = {}
+        for r in self.results:
+            repo_requests[r.repo] = repo_requests.get(r.repo, 0) + 1
+
+        stats = {
+            "total_requests": total_requests,
+            "successful": len(successful),
+            "failed": len(failed),
+            "success_rate": len(successful) / total_requests * 100,
+            "total_duration_sec": total_duration,
+            "requests_per_sec": total_requests / total_duration,
+            "duration_ms": {
+                "min": min(durations) if durations else 0,
+                "max": max(durations) if durations else 0,
+                "mean": statistics.mean(durations) if durations else 0,
+                "median": statistics.median(durations) if durations else 0,
+                "p95": (
+                    sorted(durations)[int(len(durations) * 0.95)]
+                    if durations
+                    else 0
+                ),
+                "p99": (
+                    sorted(durations)[int(len(durations) * 0.99)]
+                    if durations
+                    else 0
+                ),
+            },
+            "server_distribution": server_counts,
+            "repo_distribution": repo_requests,
+            "errors": {},
+        }
+
+        # Collect error types
+        for r in failed:
+            stats["errors"][r.error] = stats["errors"].get(r.error, 0) + 1
+
+        return stats
+
+    def print_summary(self, stats: Dict):
+        """Print formatted summary statistics."""
+        print("\n" + "=" * 60)
+        print("LOAD TEST RESULTS")
+        print("=" * 60)
+        print(f"\nTotal Requests:    {stats['total_requests']}")
+        print(f"Successful:        {stats['successful']}")
+        print(f"Failed:            {stats['failed']}")
+        print(f"Success Rate:      {stats['success_rate']:.2f}%")
+        print(f"Total Duration:    {stats['total_duration_sec']:.2f}s")
+        print(f"Requests/sec:      {stats['requests_per_sec']:.2f}")
+
+        print(f"\nResponse Times (ms):")
+        print(f"  Min:             {stats['duration_ms']['min']:.2f}")
+        print(f"  Max:             {stats['duration_ms']['max']:.2f}")
+        print(f"  Mean:            {stats['duration_ms']['mean']:.2f}")
+        print(f"  Median:          {stats['duration_ms']['median']:.2f}")
+        print(f"  P95:             {stats['duration_ms']['p95']:.2f}")
+        print(f"  P99:             {stats['duration_ms']['p99']:.2f}")
+
+        if stats["server_distribution"]:
+            print(f"\nServer Distribution:")
+            for server, count in sorted(stats["server_distribution"].items()):
+                pct = count / stats["total_requests"] * 100
+                print(f"  {server:20s} {count:6d} ({pct:5.2f}%)")
+
+        if stats["errors"]:
+            print(f"\nErrors:")
+            for error, count in sorted(
+                stats["errors"].items(), key=lambda x: x[1], reverse=True
+            ):
+                print(f"  {error:40s} {count:6d}")
+
+        print("\n" + "=" * 60 + "\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Load test harness for Goblet Git caching proxy"
+    )
+    parser.add_argument(
+        "--url",
+        default="http://localhost:8080",
+        help="Target URL (default: http://localhost:8080)",
+    )
+    parser.add_argument(
+        "--workers", type=int, default=10, help="Number of concurrent workers"
+    )
+    parser.add_argument(
+        "--requests", type=int, default=100, help="Requests per worker"
+    )
+    parser.add_argument(
+        "--think-time", type=int, default=100, help="Think time between requests (ms)"
+    )
+    parser.add_argument(
+        "--repos",
+        nargs="+",
+        default=[
+            "github.com/kubernetes/kubernetes",
+            "github.com/golang/go",
+            "github.com/torvalds/linux",
+            "github.com/hashicorp/terraform",
+        ],
+        help="List of repository paths to test",
+    )
+    parser.add_argument(
+        "--output", help="Output file for JSON results (optional)"
+    )
+
+    args = parser.parse_args()
+
+    runner = LoadTestRunner(
+        target_url=args.url,
+        repositories=args.repos,
+        num_workers=args.workers,
+        requests_per_worker=args.requests,
+        think_time_ms=args.think_time,
+    )
+
+    stats = runner.run()
+    runner.print_summary(stats)
+
+    if args.output:
+        with open(args.output, "w") as f:
+            json.dump(stats, f, indent=2)
+        print(f"Results saved to {args.output}")
+
+    # Exit code based on success rate
+    sys.exit(0 if stats["success_rate"] >= 95 else 1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/loadtest/prometheus.yml b/loadtest/prometheus.yml
new file mode 100644
index 0000000..02efce4
--- /dev/null
+++ b/loadtest/prometheus.yml
@@ -0,0 +1,34 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  external_labels:
+    cluster: 'loadtest'
+
+scrape_configs:
+  # Scrape Goblet instances
+  - job_name: 'goblet'
+    static_configs:
+      - targets:
+          - 'goblet-1:8080'
+          - 'goblet-2:8080'
+          - 'goblet-3:8080'
+        labels:
+          service: 'goblet'
+    metrics_path: '/metrics'
+    scrape_interval: 10s
+
+  # Scrape HAProxy stats
+  - job_name: 'haproxy'
+    static_configs:
+      - targets:
+          - 'haproxy:8404'
+        labels:
+          service: 'haproxy'
+    metrics_path: '/metrics'
+    scrape_interval: 10s
+
+  # Scrape Prometheus itself
+  - job_name: 'prometheus'
+    static_configs:
+      - targets:
+          - 'localhost:9090'