diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..be3d884 --- /dev/null +++ b/.env.example @@ -0,0 +1,87 @@ +# ============================================================================== +# PDDL RLHF Environment Variables Configuration +# ============================================================================== +# This file serves as a template for required environment variables. +# Copy this file to .env and fill in your actual values. +# NEVER commit .env to version control! +# ============================================================================== + +# ------------------------------------------------------------------------------ +# Google Vertex AI Configuration (REQUIRED for backend) +# ------------------------------------------------------------------------------ + +# Google Cloud Project ID +# Example: my-project-12345 +GOOGLE_CLOUD_PROJECT=your-google-cloud-project-id + +# Google Cloud Region/Location for Vertex AI +# Example: us-central1, us-east1, europe-west1 +GOOGLE_CLOUD_LOCATION=us-central1 + +# Google Vertex AI Model ID +# This is the numeric ID of your deployed model endpoint +GOOGLE_PDDL_MODEL=your-model-id + +# Google Vertex AI Credentials (JSON format) +# This should be the entire JSON credentials file as a single line or multiline string +# Option 1: Set via Railway/Vercel dashboard by pasting the entire JSON +# Option 2: For local dev, you can use GOOGLE_APPLICATION_CREDENTIALS pointing to a file path +# Example: {"type":"service_account","project_id":"...","private_key_id":"..."} +GOOGLE_APPLICATION_CREDENTIALS_JSON={"type":"service_account","project_id":"your-project","private_key_id":"..."} + +# Alternative: File path to credentials (for local development only) +# GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json + +# ------------------------------------------------------------------------------ +# CORS Configuration (REQUIRED for production) +# ------------------------------------------------------------------------------ + +# Comma-separated list of allowed CORS origins +# Development: Use "*" for testing (INSECURE) +# Production: Specify your frontend domain(s) +# Example: https://your-app.vercel.app,https://www.your-domain.com +ALLOWED_ORIGINS=* + +# ------------------------------------------------------------------------------ +# Fireworks AI Configuration (OPTIONAL - for CLI tool only) +# ------------------------------------------------------------------------------ + +# Fireworks AI API Key (only needed if using pddl_planner.py CLI tool) +# Get your key from: https://fireworks.ai +FIREWORKS_API_KEY=fw_your_api_key_here + +# ------------------------------------------------------------------------------ +# Server Configuration (OPTIONAL) +# ------------------------------------------------------------------------------ + +# Port for the backend server (Railway auto-sets this) +# Default: 8000 +# PORT=8000 + +# ------------------------------------------------------------------------------ +# Frontend Configuration (for Vercel deployment) +# ------------------------------------------------------------------------------ + +# Backend API URL (set in Vercel environment variables) +# Example: https://your-backend.railway.app +# VITE_API_URL=http://localhost:8000 + +# ============================================================================== +# DEPLOYMENT NOTES +# ============================================================================== +# +# LOCAL DEVELOPMENT: +# 1. Copy this file: cp .env.example .env +# 2. Fill in your actual values in .env +# 3. The backend will automatically load .env using python-dotenv +# +# RAILWAY DEPLOYMENT (Backend): +# 1. Go to your Railway project → Variables +# 2. Add each variable above (except VITE_API_URL) +# 3. For GOOGLE_APPLICATION_CREDENTIALS_JSON, paste the entire JSON +# +# VERCEL DEPLOYMENT (Frontend): +# 1. Go to your Vercel project → Settings → Environment Variables +# 2. Add VITE_API_URL with your Railway backend URL +# +# ============================================================================== diff --git a/backend/main.py b/backend/main.py index 40614d8..8a21175 100644 --- a/backend/main.py +++ b/backend/main.py @@ -15,9 +15,20 @@ import uuid import logging import sys +from pathlib import Path from google import genai from google.genai.types import GenerateContentConfig, ThinkingConfig +# Load environment variables from .env file (for local development) +from dotenv import load_dotenv + +# Load .env file if it exists (will be ignored in production where env vars are set by platform) +env_path = Path(__file__).parent / '.env' +if env_path.exists(): + load_dotenv(dotenv_path=env_path) + logger_temp = logging.getLogger(__name__) + logger_temp.info(f"📝 Loaded environment variables from {env_path}") + # Configure logging for Railway logging.basicConfig( level=logging.INFO, @@ -28,38 +39,124 @@ ) logger = logging.getLogger(__name__) + +def validate_required_env_vars(): + """ + Validate that all required environment variables are present. + Fails fast with clear error messages if any are missing. + """ + required_vars = { + 'GOOGLE_CLOUD_PROJECT': 'Google Cloud Project ID', + 'GOOGLE_CLOUD_LOCATION': 'Google Cloud Location/Region', + 'GOOGLE_PDDL_MODEL': 'Google Vertex AI Model ID', + } + + # Either GOOGLE_APPLICATION_CREDENTIALS_JSON or GOOGLE_APPLICATION_CREDENTIALS must be set + credentials_options = { + 'GOOGLE_APPLICATION_CREDENTIALS_JSON': 'Google Vertex AI credentials (JSON string)', + 'GOOGLE_APPLICATION_CREDENTIALS': 'Google Vertex AI credentials (file path)', + } + + missing_vars = [] + present_vars = [] + + # Check required vars + for var_name, description in required_vars.items(): + if not os.getenv(var_name): + missing_vars.append(f" - {var_name}: {description}") + else: + present_vars.append(var_name) + + # Check credentials (at least one must be present) + credentials_present = any(os.getenv(var) for var in credentials_options.keys()) + if not credentials_present: + missing_vars.append(f" - One of: {', '.join(credentials_options.keys())}") + for var_name, description in credentials_options.items(): + missing_vars.append(f" • {var_name}: {description}") + else: + for var_name in credentials_options.keys(): + if os.getenv(var_name): + present_vars.append(var_name) + + # Check optional but recommended vars + optional_vars = { + 'ALLOWED_ORIGINS': 'CORS allowed origins (recommended for production)', + } + + warnings = [] + for var_name, description in optional_vars.items(): + if not os.getenv(var_name): + warnings.append(f" - {var_name}: {description}") + else: + present_vars.append(var_name) + + # Log results + logger.info("=" * 70) + logger.info("🔍 Environment Variable Validation") + logger.info("=" * 70) + + if present_vars: + logger.info("✅ Present variables:") + for var in present_vars: + # Never log actual values, only presence + logger.info(f" ✓ {var}: [CONFIGURED]") + + if warnings: + logger.warning("⚠️ Optional variables (not set):") + for warning in warnings: + logger.warning(warning) + + if missing_vars: + logger.error("❌ Missing required environment variables:") + for var in missing_vars: + logger.error(var) + logger.error("=" * 70) + logger.error("💡 To fix this:") + logger.error(" 1. For local development: Copy .env.example to .env and fill in values") + logger.error(" 2. For Railway: Set variables in Project → Variables") + logger.error(" 3. For Vercel: Set variables in Project → Settings → Environment Variables") + logger.error("=" * 70) + raise RuntimeError( + "Application startup failed: Required environment variables are missing. " + "Check logs above for details." + ) + + logger.info("=" * 70) + logger.info("✅ All required environment variables present") + logger.info("=" * 70) + + +# Validate environment variables on module load (before app starts) +validate_required_env_vars() + +# Configuration - Load from environment variables (no defaults for production security) +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION") +MODEL = os.getenv("GOOGLE_PDDL_MODEL") + +# CORS origins configuration +ALLOWED_ORIGINS_STR = os.getenv("ALLOWED_ORIGINS", "*") +ALLOWED_ORIGINS = [origin.strip() for origin in ALLOWED_ORIGINS_STR.split(",") if origin.strip()] + +logger.info(f"🔒 CORS configured with {len(ALLOWED_ORIGINS)} allowed origin(s)") +if "*" in ALLOWED_ORIGINS: + logger.warning("⚠️ CORS allows ALL origins (*) - use specific domains in production!") + app = FastAPI(title="PDDL RLHF API", version="1.0.0") -# CORS configuration for Vercel frontend +# CORS configuration - use environment-based origins app.add_middleware( CORSMiddleware, - allow_origins=["*"], # In production, replace with your Vercel domain + allow_origins=ALLOWED_ORIGINS, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) -@app.on_event("startup") -async def startup_event(): - """Initialize the application on startup.""" - logger.info("=" * 60) - logger.info("🚀 PDDL RLHF API starting up...") - logger.info(f"📁 Working directory: {os.getcwd()}") - logger.info(f"📁 Training data directory: {os.path.join(os.getcwd(), 'training_data')}") - logger.info(f"🔑 Project ID configured: {'Yes' if PROJECT_ID else 'No'}") - logger.info(f"🤖 Model: {MODEL}") - logger.info(f"🌐 Location: {LOCATION}") - logger.info(f"🌐 PORT: {os.getenv('PORT', 'not set')}") - logger.info("✅ Startup complete!") - logger.info("=" * 60) - -# Configuration -PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "deep-research-467303") -LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1") -MODEL = os.getenv("PDDL_MODEL", "8060593410504916992") - # Handle Google Cloud credentials from environment variable credentials_json = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON") +credentials_file_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + if credentials_json: import tempfile try: @@ -70,7 +167,7 @@ async def startup_event(): if start_idx != -1 and end_idx != -1: credentials_json = credentials_json[start_idx:end_idx+1] - # Parse and write to file + # Parse and write to temporary file credentials_dict = json.loads(credentials_json) credentials_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') json.dump(credentials_dict, credentials_file, indent=2) @@ -78,9 +175,16 @@ async def startup_event(): credentials_file.close() os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_file.name - logger.info(f"✅ Google Cloud credentials loaded from environment variable") + logger.info(f"✅ Google Cloud credentials loaded from GOOGLE_APPLICATION_CREDENTIALS_JSON") except Exception as e: - logger.error(f"❌ Failed to setup credentials: {str(e)}") + logger.error(f"❌ Failed to parse GOOGLE_APPLICATION_CREDENTIALS_JSON: {str(e)}") + raise RuntimeError(f"Invalid GOOGLE_APPLICATION_CREDENTIALS_JSON: {str(e)}") +elif credentials_file_path: + logger.info(f"✅ Using Google Cloud credentials from file: {credentials_file_path}") +else: + # This should not happen due to validation above, but double-check + logger.error("❌ No Google Cloud credentials configured") + raise RuntimeError("No Google Cloud credentials found") # Initialize Google Vertex AI client (will be initialized on first use if credentials are available) genai_client = None @@ -96,6 +200,21 @@ def get_genai_client(): logger.error(f"❌ Failed to initialize Vertex AI client: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to initialize Vertex AI client: {str(e)}") return genai_client + + +@app.on_event("startup") +async def startup_event(): + """Initialize the application on startup.""" + logger.info("=" * 60) + logger.info("🚀 PDDL RLHF API starting up...") + logger.info(f"📁 Working directory: {os.getcwd()}") + logger.info(f"📁 Training data directory: {os.path.join(os.getcwd(), 'training_data')}") + logger.info(f"🔑 Project ID: [CONFIGURED]") + logger.info(f"🤖 Model: {MODEL}") + logger.info(f"🌐 Location: {LOCATION}") + logger.info(f"🌐 PORT: {os.getenv('PORT', '8000')}") + logger.info("✅ Startup complete!") + logger.info("=" * 60) SYSTEM_PROMPT = """# Role You are a **PDDL planning expert and COT (Chain-of-Thought) generator**. You receive a planning problem and must: diff --git a/pddl_planner.py b/pddl_planner.py index 3fecccc..f841d22 100644 --- a/pddl_planner.py +++ b/pddl_planner.py @@ -7,11 +7,32 @@ import argparse import json import sys +import os import requests +from pathlib import Path +# Load environment variables from .env file if present +try: + from dotenv import load_dotenv + env_path = Path(__file__).parent / '.env' + if env_path.exists(): + load_dotenv(dotenv_path=env_path) +except ImportError: + # python-dotenv not installed, skip (env vars can still be set manually) + pass API_URL = "https://api.fireworks.ai/inference/v1/chat/completions" -API_KEY = "fw_3ZNkrZnbfKVHhU65bFirkpJr" + +# Load API key from environment variable (REQUIRED) +API_KEY = os.getenv("FIREWORKS_API_KEY") +if not API_KEY: + print("ERROR: FIREWORKS_API_KEY environment variable is not set.", file=sys.stderr) + print("\nTo fix this:", file=sys.stderr) + print(" 1. For local use: Add FIREWORKS_API_KEY=your_key to .env file", file=sys.stderr) + print(" 2. Or set it directly: export FIREWORKS_API_KEY=your_key", file=sys.stderr) + print(" 3. Get your key from: https://fireworks.ai\n", file=sys.stderr) + sys.exit(1) + MODEL = "accounts/colin-fbf68a/deployedModels/pddlplanner-turbo-10141406-w5lghxbj" SYSTEM_PROMPT = "You are an expert planning assistant. When given a problem, output a structured plan in PDDL format with actions and explanations."