MikeVenge · MikeVenge · Dec 19, 2025
diff --git a/.env.example b/.env.example
@@ -0,0 +1,87 @@
+# ==============================================================================
+# PDDL RLHF Environment Variables Configuration
+# ==============================================================================
+# This file serves as a template for required environment variables.
+# Copy this file to .env and fill in your actual values.
+# NEVER commit .env to version control!
+# ==============================================================================
+
+# ------------------------------------------------------------------------------
+# Google Vertex AI Configuration (REQUIRED for backend)
+# ------------------------------------------------------------------------------
+
+# Google Cloud Project ID
+# Example: my-project-12345
+GOOGLE_CLOUD_PROJECT=your-google-cloud-project-id
+
+# Google Cloud Region/Location for Vertex AI
+# Example: us-central1, us-east1, europe-west1
+GOOGLE_CLOUD_LOCATION=us-central1
+
+# Google Vertex AI Model ID
+# This is the numeric ID of your deployed model endpoint
+GOOGLE_PDDL_MODEL=your-model-id
+
+# Google Vertex AI Credentials (JSON format)
+# This should be the entire JSON credentials file as a single line or multiline string
+# Option 1: Set via Railway/Vercel dashboard by pasting the entire JSON
+# Option 2: For local dev, you can use GOOGLE_APPLICATION_CREDENTIALS pointing to a file path
+# Example: {"type":"service_account","project_id":"...","private_key_id":"..."}
+GOOGLE_APPLICATION_CREDENTIALS_JSON={"type":"service_account","project_id":"your-project","private_key_id":"..."}
+
+# Alternative: File path to credentials (for local development only)
+# GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json
+
+# ------------------------------------------------------------------------------
+# CORS Configuration (REQUIRED for production)
+# ------------------------------------------------------------------------------
+
+# Comma-separated list of allowed CORS origins
+# Development: Use "*" for testing (INSECURE)
+# Production: Specify your frontend domain(s)
+# Example: https://your-app.vercel.app,https://www.your-domain.com
+ALLOWED_ORIGINS=*
+
+# ------------------------------------------------------------------------------
+# Fireworks AI Configuration (OPTIONAL - for CLI tool only)
+# ------------------------------------------------------------------------------
+
+# Fireworks AI API Key (only needed if using pddl_planner.py CLI tool)
+# Get your key from: https://fireworks.ai
+FIREWORKS_API_KEY=fw_your_api_key_here
+
+# ------------------------------------------------------------------------------
+# Server Configuration (OPTIONAL)
+# ------------------------------------------------------------------------------
+
+# Port for the backend server (Railway auto-sets this)
+# Default: 8000
+# PORT=8000
+
+# ------------------------------------------------------------------------------
+# Frontend Configuration (for Vercel deployment)
+# ------------------------------------------------------------------------------
+
+# Backend API URL (set in Vercel environment variables)
+# Example: https://your-backend.railway.app
+# VITE_API_URL=http://localhost:8000
+
+# ==============================================================================
+# DEPLOYMENT NOTES
+# ==============================================================================
+#
+# LOCAL DEVELOPMENT:
+# 1. Copy this file: cp .env.example .env
+# 2. Fill in your actual values in .env
+# 3. The backend will automatically load .env using python-dotenv
+#
+# RAILWAY DEPLOYMENT (Backend):
+# 1. Go to your Railway project → Variables
+# 2. Add each variable above (except VITE_API_URL)
+# 3. For GOOGLE_APPLICATION_CREDENTIALS_JSON, paste the entire JSON
+#
+# VERCEL DEPLOYMENT (Frontend):
+# 1. Go to your Vercel project → Settings → Environment Variables
+# 2. Add VITE_API_URL with your Railway backend URL
+#
+# ==============================================================================
diff --git a/backend/main.py b/backend/main.py
@@ -15,9 +15,20 @@
 import uuid
 import logging
 import sys
+from pathlib import Path
 from google import genai
 from google.genai.types import GenerateContentConfig, ThinkingConfig
 
+# Load environment variables from .env file (for local development)
+from dotenv import load_dotenv
+
+# Load .env file if it exists (will be ignored in production where env vars are set by platform)
+env_path = Path(__file__).parent / '.env'
+if env_path.exists():
+    load_dotenv(dotenv_path=env_path)
+    logger_temp = logging.getLogger(__name__)
+    logger_temp.info(f"📝 Loaded environment variables from {env_path}")
+
 # Configure logging for Railway
 logging.basicConfig(
     level=logging.INFO,
@@ -28,38 +39,124 @@
 )
 logger = logging.getLogger(__name__)
 
+
+def validate_required_env_vars():
+    """
+    Validate that all required environment variables are present.
+    Fails fast with clear error messages if any are missing.
+    """
+    required_vars = {
+        'GOOGLE_CLOUD_PROJECT': 'Google Cloud Project ID',
+        'GOOGLE_CLOUD_LOCATION': 'Google Cloud Location/Region',
+        'GOOGLE_PDDL_MODEL': 'Google Vertex AI Model ID',
+    }
+
+    # Either GOOGLE_APPLICATION_CREDENTIALS_JSON or GOOGLE_APPLICATION_CREDENTIALS must be set
+    credentials_options = {
+        'GOOGLE_APPLICATION_CREDENTIALS_JSON': 'Google Vertex AI credentials (JSON string)',
+        'GOOGLE_APPLICATION_CREDENTIALS': 'Google Vertex AI credentials (file path)',
+    }
+
+    missing_vars = []
+    present_vars = []
+
+    # Check required vars
+    for var_name, description in required_vars.items():
+        if not os.getenv(var_name):
+            missing_vars.append(f"  - {var_name}: {description}")
+        else:
+            present_vars.append(var_name)
+
+    # Check credentials (at least one must be present)
+    credentials_present = any(os.getenv(var) for var in credentials_options.keys())
+    if not credentials_present:
+        missing_vars.append(f"  - One of: {', '.join(credentials_options.keys())}")
+        for var_name, description in credentials_options.items():
+            missing_vars.append(f"    • {var_name}: {description}")
+    else:
+        for var_name in credentials_options.keys():
+            if os.getenv(var_name):
+                present_vars.append(var_name)
+
+    # Check optional but recommended vars
+    optional_vars = {
+        'ALLOWED_ORIGINS': 'CORS allowed origins (recommended for production)',
+    }
+
+    warnings = []
+    for var_name, description in optional_vars.items():
+        if not os.getenv(var_name):
+            warnings.append(f"  - {var_name}: {description}")
+        else:
+            present_vars.append(var_name)
+
+    # Log results
+    logger.info("=" * 70)
+    logger.info("🔍 Environment Variable Validation")
+    logger.info("=" * 70)
+
+    if present_vars:
+        logger.info("✅ Present variables:")
+        for var in present_vars:
+            # Never log actual values, only presence
+            logger.info(f"  ✓ {var}: [CONFIGURED]")
+
+    if warnings:
+        logger.warning("⚠️  Optional variables (not set):")
+        for warning in warnings:
+            logger.warning(warning)
+
+    if missing_vars:
+        logger.error("❌ Missing required environment variables:")
+        for var in missing_vars:
+            logger.error(var)
+        logger.error("=" * 70)
+        logger.error("💡 To fix this:")
+        logger.error("  1. For local development: Copy .env.example to .env and fill in values")
+        logger.error("  2. For Railway: Set variables in Project → Variables")
+        logger.error("  3. For Vercel: Set variables in Project → Settings → Environment Variables")
+        logger.error("=" * 70)
+        raise RuntimeError(
+            "Application startup failed: Required environment variables are missing. "
+            "Check logs above for details."
+        )
+
+    logger.info("=" * 70)
+    logger.info("✅ All required environment variables present")
+    logger.info("=" * 70)
+
+
+# Validate environment variables on module load (before app starts)
+validate_required_env_vars()
+
+# Configuration - Load from environment variables (no defaults for production security)
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION")
+MODEL = os.getenv("GOOGLE_PDDL_MODEL")
+
+# CORS origins configuration
+ALLOWED_ORIGINS_STR = os.getenv("ALLOWED_ORIGINS", "*")
+ALLOWED_ORIGINS = [origin.strip() for origin in ALLOWED_ORIGINS_STR.split(",") if origin.strip()]
+
+logger.info(f"🔒 CORS configured with {len(ALLOWED_ORIGINS)} allowed origin(s)")
+if "*" in ALLOWED_ORIGINS:
+    logger.warning("⚠️  CORS allows ALL origins (*) - use specific domains in production!")
+
 app = FastAPI(title="PDDL RLHF API", version="1.0.0")
 
-# CORS configuration for Vercel frontend
+# CORS configuration - use environment-based origins
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # In production, replace with your Vercel domain
+    allow_origins=ALLOWED_ORIGINS,
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 
-@app.on_event("startup")
-async def startup_event():
-    """Initialize the application on startup."""
-    logger.info("=" * 60)
-    logger.info("🚀 PDDL RLHF API starting up...")
-    logger.info(f"📁 Working directory: {os.getcwd()}")
-    logger.info(f"📁 Training data directory: {os.path.join(os.getcwd(), 'training_data')}")
-    logger.info(f"🔑 Project ID configured: {'Yes' if PROJECT_ID else 'No'}")
-    logger.info(f"🤖 Model: {MODEL}")
-    logger.info(f"🌐 Location: {LOCATION}")
-    logger.info(f"🌐 PORT: {os.getenv('PORT', 'not set')}")
-    logger.info("✅ Startup complete!")
-    logger.info("=" * 60)
-
-# Configuration
-PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "deep-research-467303")
-LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
-MODEL = os.getenv("PDDL_MODEL", "8060593410504916992")
-
 # Handle Google Cloud credentials from environment variable
 credentials_json = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
+credentials_file_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+
 if credentials_json:
     import tempfile
     try:
@@ -70,17 +167,24 @@ async def startup_event():
         if start_idx != -1 and end_idx != -1:
             credentials_json = credentials_json[start_idx:end_idx+1]
 
-        # Parse and write to file
+        # Parse and write to temporary file
         credentials_dict = json.loads(credentials_json)
         credentials_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json')
         json.dump(credentials_dict, credentials_file, indent=2)
         credentials_file.flush()
         credentials_file.close()
 
         os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_file.name
-        logger.info(f"✅ Google Cloud credentials loaded from environment variable")
+        logger.info(f"✅ Google Cloud credentials loaded from GOOGLE_APPLICATION_CREDENTIALS_JSON")
     except Exception as e:
-        logger.error(f"❌ Failed to setup credentials: {str(e)}")
+        logger.error(f"❌ Failed to parse GOOGLE_APPLICATION_CREDENTIALS_JSON: {str(e)}")
+        raise RuntimeError(f"Invalid GOOGLE_APPLICATION_CREDENTIALS_JSON: {str(e)}")
+elif credentials_file_path:
+    logger.info(f"✅ Using Google Cloud credentials from file: {credentials_file_path}")
+else:
+    # This should not happen due to validation above, but double-check
+    logger.error("❌ No Google Cloud credentials configured")
+    raise RuntimeError("No Google Cloud credentials found")
 
 # Initialize Google Vertex AI client (will be initialized on first use if credentials are available)
 genai_client = None
@@ -96,6 +200,21 @@ def get_genai_client():
             logger.error(f"❌ Failed to initialize Vertex AI client: {str(e)}")
             raise HTTPException(status_code=500, detail=f"Failed to initialize Vertex AI client: {str(e)}")
     return genai_client
+
+
+@app.on_event("startup")
+async def startup_event():
+    """Initialize the application on startup."""
+    logger.info("=" * 60)
+    logger.info("🚀 PDDL RLHF API starting up...")
+    logger.info(f"📁 Working directory: {os.getcwd()}")
+    logger.info(f"📁 Training data directory: {os.path.join(os.getcwd(), 'training_data')}")
+    logger.info(f"🔑 Project ID: [CONFIGURED]")
+    logger.info(f"🤖 Model: {MODEL}")
+    logger.info(f"🌐 Location: {LOCATION}")
+    logger.info(f"🌐 PORT: {os.getenv('PORT', '8000')}")
+    logger.info("✅ Startup complete!")
+    logger.info("=" * 60)
 SYSTEM_PROMPT = """# Role
 
 You are a **PDDL planning expert and COT (Chain-of-Thought) generator**. You receive a planning problem and must:

diff --git a/pddl_planner.py b/pddl_planner.py
@@ -7,11 +7,32 @@
 import argparse
 import json
 import sys
+import os
 import requests
+from pathlib import Path
 
+# Load environment variables from .env file if present
+try:
+    from dotenv import load_dotenv
+    env_path = Path(__file__).parent / '.env'
+    if env_path.exists():
+        load_dotenv(dotenv_path=env_path)
+except ImportError:
+    # python-dotenv not installed, skip (env vars can still be set manually)
+    pass
 
 API_URL = "https://api.fireworks.ai/inference/v1/chat/completions"
-API_KEY = "fw_3ZNkrZnbfKVHhU65bFirkpJr"
+
+# Load API key from environment variable (REQUIRED)
+API_KEY = os.getenv("FIREWORKS_API_KEY")
+if not API_KEY:
+    print("ERROR: FIREWORKS_API_KEY environment variable is not set.", file=sys.stderr)
+    print("\nTo fix this:", file=sys.stderr)
+    print("  1. For local use: Add FIREWORKS_API_KEY=your_key to .env file", file=sys.stderr)
+    print("  2. Or set it directly: export FIREWORKS_API_KEY=your_key", file=sys.stderr)
+    print("  3. Get your key from: https://fireworks.ai\n", file=sys.stderr)
+    sys.exit(1)
+
 MODEL = "accounts/colin-fbf68a/deployedModels/pddlplanner-turbo-10141406-w5lghxbj"
 SYSTEM_PROMPT = "You are an expert planning assistant. When given a problem, output a structured plan in PDDL format with actions and explanations."