1rgs · jovermier · Dec 12, 2025 · Dec 13, 2025 · Dec 18, 2025
diff --git a/.env.example b/.env.example
@@ -29,6 +29,12 @@ OPENAI_BASE_URL="https://api.openai.com/v1"
 # BIG_MODEL="gemini-2.5-pro"
 # SMALL_MODEL="gemini-2.5-flash"
 
+# Optional: Pass through incoming API keys
+# When enabled, extracts API key from x-api-key or Authorization Bearer headers
+# and forwards it to upstream services instead of using static API keys above
+# Useful for multi-tenant environments with per-user quota tracking
+# PASSTHROUGH_API_KEY="false"
+
 # Example "just an Anthropic proxy" mode:
 # PREFERRED_PROVIDER="anthropic"
 # (BIG_MODEL and SMALL_MODEL are ignored in this mode)
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,13 @@
-FROM python:latest
+FROM python:3.12-slim
+
+# Install minimal system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+ENV PATH="/root/.cargo/bin:${PATH}"
 
 WORKDIR /claude-code-proxy
 
@@ -11,6 +20,14 @@ RUN pip install --upgrade uv && uv sync --locked
 # Copy project code to current directory
 COPY . .
 
+# Clean up build dependencies to reduce image size
+RUN apt-get remove -y gcc \
+    && apt-get autoremove -y \
+    && rm -rf /root/.cargo/registry \
+    && rm -rf /root/.cargo/git \
+    && rm -rf /tmp/* \
+    && pip cache purge
+
 # Start the proxy
 EXPOSE 8082
-CMD uv run uvicorn server:app --host 0.0.0.0 --port 8082 --reload
+CMD ["uv", "run", "uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8082"]
diff --git a/README.md b/README.md
@@ -45,6 +45,8 @@ A proxy server that lets you use Anthropic clients with Gemini, OpenAI, or Anthr
    *   `USE_VERTEX_AUTH` (Optional): Set to `true` to use Application Default Credentials (ADC) will be used (no static API key required). Note: when USE_VERTEX_AUTH=true, you must configure `VERTEX_PROJECT` and `VERTEX_LOCATION`.
    *   `VERTEX_PROJECT` (Optional): Your Google Cloud Project ID (Required if `PREFERRED_PROVIDER=google` and `USE_VERTEX_AUTH=true`).
    *   `VERTEX_LOCATION` (Optional): The Google Cloud region for Vertex AI (e.g., `us-central1`) (Required if `PREFERRED_PROVIDER=google` and `USE_VERTEX_AUTH=true`).
+   *   `PASSTHROUGH_API_KEY` (Optional): Set to `true` to extract API keys from incoming request headers (`x-api-key` or `Authorization Bearer`) and forward them to upstream services instead of using static API keys. Useful for multi-tenant environments with per-user quota tracking. **If passthrough mode is enabled but no valid API key is found in the request, the proxy will fall back to using the static API keys provided via environment variables.**
+   *   `REQUIRE_PASSTHROUGH_KEY` (Optional): Set to `true` to reject requests when passthrough mode is enabled but no valid API key is found in the request headers. This prevents silent fallback to static API keys and ensures strict per-user quota tracking in multi-tenant environments. Requires `PASSTHROUGH_API_KEY=true`.
    *   `PREFERRED_PROVIDER` (Optional): Set to `openai` (default), `google`, or `anthropic`. This determines the primary backend for mapping `haiku`/`sonnet`.
    *   `BIG_MODEL` (Optional): The model to map `sonnet` requests to. Defaults to `gpt-4.1` (if `PREFERRED_PROVIDER=openai`) or `gemini-2.5-pro-preview-03-25`. Ignored when `PREFERRED_PROVIDER=anthropic`.
    *   `SMALL_MODEL` (Optional): The model to map `haiku` requests to. Defaults to `gpt-4.1-mini` (if `PREFERRED_PROVIDER=openai`) or `gemini-2.0-flash`. Ignored when `PREFERRED_PROVIDER=anthropic`.
@@ -194,6 +196,17 @@ BIG_MODEL="gpt-4o" # Example specific model
 SMALL_MODEL="gpt-4o-mini" # Example specific model
 ```
 
+**Example 5: Multi-tenant Environment with Required API Keys**
+```dotenv
+OPENAI_API_KEY="your-openai-key" # Fallback key
+GEMINI_API_KEY="your-google-key" # Fallback key
+PASSTHROUGH_API_KEY=true # Enable passthrough mode
+REQUIRE_PASSTHROUGH_KEY=true # Require valid API key, no fallback
+PREFERRED_PROVIDER="openai"
+```
+
+*Use case: This ensures strict per-user quota tracking by rejecting any request without a valid API key, preventing silent fallback to shared static keys.*
+
 ## How It Works 🧩
 
 This proxy works by:

diff --git a/server.py b/server.py
@@ -92,13 +92,28 @@ def format(self, record):
 # Get OpenAI base URL from environment (if set)
 OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL")
 
+# Option to pass through the incoming API key from request header to upstream
+# When enabled, the x-api-key or Authorization Bearer header from the incoming request is used instead of OPENAI_API_KEY
+# This enables per-user quota tracking when routing through a gateway
+PASSTHROUGH_API_KEY = os.environ.get("PASSTHROUGH_API_KEY", "False").lower() == "true"
+
+# Option to require a valid API key when passthrough mode is enabled
+# When enabled, requests without valid API keys will be rejected instead of falling back to static keys
+REQUIRE_PASSTHROUGH_KEY = os.environ.get("REQUIRE_PASSTHROUGH_KEY", "False").lower() == "true"
+
+# Validate configuration
+if REQUIRE_PASSTHROUGH_KEY and not PASSTHROUGH_API_KEY:
+    logger.error("REQUIRE_PASSTHROUGH_KEY=true requires PASSTHROUGH_API_KEY=true")
+    raise ValueError("REQUIRE_PASSTHROUGH_KEY=true requires PASSTHROUGH_API_KEY=true")
+
 # Get preferred provider (default to openai)
 PREFERRED_PROVIDER = os.environ.get("PREFERRED_PROVIDER", "openai").lower()
 
 # Get model mapping configuration from environment
 # Default to latest OpenAI models if not set
 BIG_MODEL = os.environ.get("BIG_MODEL", "gpt-4.1")
 SMALL_MODEL = os.environ.get("SMALL_MODEL", "gpt-4.1-mini")
+OPUS_MODEL = os.environ.get("OPUS_MODEL", "gpt-4.1")
 
 # List of OpenAI models
 OPENAI_MODELS = [
@@ -220,24 +235,33 @@ def validate_model_field(cls, v, info): # Renamed to avoid conflict
             new_model = f"anthropic/{clean_v}"
             mapped = True
 
-        # Map Haiku to SMALL_MODEL based on provider preference
-        elif 'haiku' in clean_v.lower():
+        # Map small models (claude-small, haiku) to SMALL_MODEL based on provider preference
+        elif 'claude-small' in clean_v.lower() or 'haiku' in clean_v.lower():
             if PREFERRED_PROVIDER == "google" and SMALL_MODEL in GEMINI_MODELS:
                 new_model = f"gemini/{SMALL_MODEL}"
                 mapped = True
             else:
                 new_model = f"openai/{SMALL_MODEL}"
                 mapped = True
 
-        # Map Sonnet to BIG_MODEL based on provider preference
-        elif 'sonnet' in clean_v.lower():
+        # Map medium models (claude-medium, sonnet) to BIG_MODEL based on provider preference
+        elif 'claude-medium' in clean_v.lower() or 'sonnet' in clean_v.lower():
             if PREFERRED_PROVIDER == "google" and BIG_MODEL in GEMINI_MODELS:
                 new_model = f"gemini/{BIG_MODEL}"
                 mapped = True
             else:
                 new_model = f"openai/{BIG_MODEL}"
                 mapped = True
 
+        # Map large models (claude-large, opus) to OPUS_MODEL based on provider preference
+        elif 'claude-large' in clean_v.lower() or 'opus' in clean_v.lower():
+            if PREFERRED_PROVIDER == "google" and OPUS_MODEL in GEMINI_MODELS:
+                new_model = f"gemini/{OPUS_MODEL}"
+                mapped = True
+            else:
+                new_model = f"openai/{OPUS_MODEL}"
+                mapped = True
+
         # Add prefixes to non-mapped models if they match known lists
         elif not mapped:
             if clean_v in GEMINI_MODELS and not v.startswith('gemini/'):
@@ -293,24 +317,33 @@ def validate_model_token_count(cls, v, info): # Renamed to avoid conflict
 
         # --- Mapping Logic --- START ---
         mapped = False
-        # Map Haiku to SMALL_MODEL based on provider preference
-        if 'haiku' in clean_v.lower():
+        # Map small models (claude-small, haiku) to SMALL_MODEL based on provider preference
+        if 'claude-small' in clean_v.lower() or 'haiku' in clean_v.lower():
             if PREFERRED_PROVIDER == "google" and SMALL_MODEL in GEMINI_MODELS:
                 new_model = f"gemini/{SMALL_MODEL}"
                 mapped = True
             else:
                 new_model = f"openai/{SMALL_MODEL}"
                 mapped = True
 
-        # Map Sonnet to BIG_MODEL based on provider preference
-        elif 'sonnet' in clean_v.lower():
+        # Map medium models (claude-medium, sonnet) to BIG_MODEL based on provider preference
+        elif 'claude-medium' in clean_v.lower() or 'sonnet' in clean_v.lower():
             if PREFERRED_PROVIDER == "google" and BIG_MODEL in GEMINI_MODELS:
                 new_model = f"gemini/{BIG_MODEL}"
                 mapped = True
             else:
                 new_model = f"openai/{BIG_MODEL}"
                 mapped = True
 
+        # Map large models (claude-large, opus) to OPUS_MODEL based on provider preference
+        elif 'claude-large' in clean_v.lower() or 'opus' in clean_v.lower():
+            if PREFERRED_PROVIDER == "google" and OPUS_MODEL in GEMINI_MODELS:
+                new_model = f"gemini/{OPUS_MODEL}"
+                mapped = True
+            else:
+                new_model = f"openai/{OPUS_MODEL}"
+                mapped = True
+
         # Add prefixes to non-mapped models if they match known lists
         elif not mapped:
             if clean_v in GEMINI_MODELS and not v.startswith('gemini/'):
@@ -1122,27 +1155,68 @@ async def create_message(
         # Convert Anthropic request to LiteLLM format
         litellm_request = convert_anthropic_to_litellm(request)
 
+        # Extract incoming API key from request header for passthrough mode
+        incoming_api_key = None
+        if PASSTHROUGH_API_KEY:
+            # Try x-api-key header first (Anthropic style), then Authorization Bearer
+            incoming_api_key = raw_request.headers.get("x-api-key")
+            if not incoming_api_key:
+                auth_header = raw_request.headers.get("authorization", "")
+                if auth_header.lower().startswith("bearer "):
+                    incoming_api_key = auth_header[7:]
+
+            # Basic validation - check if key is not empty, has reasonable length, and basic format
+            if incoming_api_key:
+                incoming_api_key = incoming_api_key.strip()
+                # Check minimum length and basic character validation
+                if len(incoming_api_key) >= 10 and incoming_api_key.replace('-', '').replace('_', '').replace(' ', '').isalnum():
+                    logger.debug("Passthrough mode: using API key from request header")
+                else:
+                    incoming_api_key = None
+                    logger.warning("Passthrough mode enabled but API key failed validation (invalid format or too short)")
+            else:
+                logger.warning("Passthrough mode enabled but no API key found in request headers (expected x-api-key or Authorization Bearer)")
+
+        # Check if REQUIRE_PASSTHROUGH_KEY is enabled and no valid API key was found
+        if REQUIRE_PASSTHROUGH_KEY and PASSTHROUGH_API_KEY and not incoming_api_key:
+            error_msg = "REQUIRE_PASSTHROUGH_KEY is enabled but no valid API key was found in request headers (x-api-key or Authorization Bearer)"
+            logger.error(error_msg)
+            raise HTTPException(
+                status_code=401,
+                detail={
+                    "error": error_msg,
+                    "type": "authentication_error",
+                    "code": "missing_api_key"
+                }
+            )
+
+        # Determine whether to use passthrough for logging consistency
+        use_passthrough = PASSTHROUGH_API_KEY and incoming_api_key
+
         # Determine which API key to use based on the model
         if request.model.startswith("openai/"):
-            litellm_request["api_key"] = OPENAI_API_KEY
+            # Use passthrough key if enabled, otherwise fall back to env var
+            litellm_request["api_key"] = incoming_api_key if use_passthrough else OPENAI_API_KEY
             # Use custom OpenAI base URL if configured
             if OPENAI_BASE_URL:
                 litellm_request["api_base"] = OPENAI_BASE_URL
-                logger.debug(f"Using OpenAI API key and custom base URL {OPENAI_BASE_URL} for model: {request.model}")
+                logger.debug(f"Using {'passthrough' if use_passthrough else 'OpenAI'} API key and custom base URL {OPENAI_BASE_URL} for model: {request.model}")
             else:
-                logger.debug(f"Using OpenAI API key for model: {request.model}")
+                logger.debug(f"Using {'passthrough' if use_passthrough else 'OpenAI'} API key for model: {request.model}")
         elif request.model.startswith("gemini/"):
             if USE_VERTEX_AUTH:
                 litellm_request["vertex_project"] = VERTEX_PROJECT
                 litellm_request["vertex_location"] = VERTEX_LOCATION
                 litellm_request["custom_llm_provider"] = "vertex_ai"
                 logger.debug(f"Using Gemini ADC with project={VERTEX_PROJECT}, location={VERTEX_LOCATION} and model: {request.model}")
             else:
-                litellm_request["api_key"] = GEMINI_API_KEY
-                logger.debug(f"Using Gemini API key for model: {request.model}")
+                # Use passthrough key if enabled, otherwise fall back to env var
+                litellm_request["api_key"] = incoming_api_key if use_passthrough else GEMINI_API_KEY
+                logger.debug(f"Using {'passthrough' if use_passthrough else 'Gemini'} API key for model: {request.model}")
         else:
-            litellm_request["api_key"] = ANTHROPIC_API_KEY
-            logger.debug(f"Using Anthropic API key for model: {request.model}")
+            # Use passthrough key if enabled, otherwise fall back to env var
+            litellm_request["api_key"] = incoming_api_key if use_passthrough else ANTHROPIC_API_KEY
+            logger.debug(f"Using {'passthrough' if use_passthrough else 'Anthropic'} API key for model: {request.model}")
 
         # For OpenAI models - modify request format to work with limitations
         if "openai" in litellm_request["model"] and "messages" in litellm_request: