diff --git a/.gitignore b/.gitignore
index efb5264..cf5ea7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,6 @@ get_helm.sh
 .env
 .ruff_cache
 data/minio
-eval.txt
\ No newline at end of file
+eval.txt
+services/AgentService/prompts/tests
+.DS_Store
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/Makefile b/services/AgentService/eval-harness/Makefile
deleted file mode 100644
index 7ba5229..0000000
--- a/services/AgentService/eval-harness/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-promptfoo:
-	@echo "$(GREEN)Checking npx installation...$(NC)"
-	@if ! command -v npx >/dev/null 2>&1; then \
-		echo "$(RED)Error: npx is not installed. Please install Node.js and npm first.$(NC)"; \
-		exit 1; \
-	fi
-	@echo "$(GREEN)Checking promptfoo installation...$(NC)"
-	@if ! npx promptfoo@latest --version >/dev/null 2>&1; then \
-		echo "$(GREEN)Installing promptfoo...$(NC)"; \
-		npx promptfoo@latest --version >/dev/null 2>&1 || true; \
-	fi
-	@echo "$(GREEN)Running promptfoo in AgentService prompts directory...$(NC)"
-	npx promptfoo@latest eval
-
-.PHONY: promptfoo
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/__init__.py b/services/AgentService/eval-harness/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/services/AgentService/eval-harness/promptfooconfig.yaml b/services/AgentService/eval-harness/promptfooconfig.yaml
deleted file mode 100644
index 41c47f1..0000000
--- a/services/AgentService/eval-harness/promptfooconfig.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
-
-# Learn more about building a configuration: https://promptfoo.dev/docs/configuration/guide
-
-description: "Podcast Prompts"
-
-prompts:
-  - file://./prompts_iter.py:PodcastPrompts.raw_outline_prompt
-
-providers:
-  - id: "https://integrate.api.nvidia.com/v1/chat/completions"
-    config:
-      method: "POST"
-      headers:
-        "Content-Type": "application/json"
-        "Authorization": "Bearer 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"
-      body:
-        model: "meta/llama-3.1-405b-instruct"
-        messages:
-          - role: "user"
-            content: "{{prompt}}"
-      responseParser: "json.choices[0].message.content"
-
-tests:
-  - vars:
-      duration: 10
-      text: file://./eval.txt
diff --git a/services/AgentService/eval-harness/tests/schemas/conversation.json b/services/AgentService/eval-harness/tests/schemas/conversation.json
deleted file mode 100644
index 24c0484..0000000
--- a/services/AgentService/eval-harness/tests/schemas/conversation.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "$defs": {
-    "DialogueEntry": {
-      "properties": {
-        "text": {
-          "title": "Text",
-          "type": "string"
-        },
-        "speaker": {
-          "enum": [
-            "speaker-1",
-            "speaker-2"
-          ],
-          "title": "Speaker",
-          "type": "string"
-        }
-      },
-      "required": [
-        "text",
-        "speaker"
-      ],
-      "title": "DialogueEntry",
-      "type": "object"
-    }
-  },
-  "properties": {
-    "scratchpad": {
-      "title": "Scratchpad",
-      "type": "string"
-    },
-    "dialogue": {
-      "items": {
-        "$ref": "#/$defs/DialogueEntry"
-      },
-      "title": "Dialogue",
-      "type": "array"
-    }
-  },
-  "required": [
-    "scratchpad",
-    "dialogue"
-  ],
-  "title": "Conversation",
-  "type": "object"
-}
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/tests/schemas/podcast_outline.json b/services/AgentService/eval-harness/tests/schemas/podcast_outline.json
deleted file mode 100644
index 7a07b89..0000000
--- a/services/AgentService/eval-harness/tests/schemas/podcast_outline.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-  "$defs": {
-    "PodcastSegment": {
-      "properties": {
-        "section": {
-          "title": "Section",
-          "type": "string"
-        },
-        "descriptions": {
-          "items": {
-            "type": "string"
-          },
-          "title": "Descriptions",
-          "type": "array"
-        },
-        "duration": {
-          "title": "Duration",
-          "type": "integer"
-        }
-      },
-      "required": [
-        "section",
-        "descriptions",
-        "duration"
-      ],
-      "title": "PodcastSegment",
-      "type": "object"
-    }
-  },
-  "properties": {
-    "title": {
-      "title": "Title",
-      "type": "string"
-    },
-    "segments": {
-      "items": {
-        "$ref": "#/$defs/PodcastSegment"
-      },
-      "title": "Segments",
-      "type": "array"
-    }
-  },
-  "required": [
-    "title",
-    "segments"
-  ],
-  "title": "PodcastOutline",
-  "type": "object"
-}
\ No newline at end of file
diff --git a/services/AgentService/main.py b/services/AgentService/main.py
index cd3540a..289c33a 100644
--- a/services/AgentService/main.py
+++ b/services/AgentService/main.py
@@ -1,5 +1,5 @@
 from fastapi import FastAPI, BackgroundTasks, HTTPException
-from shared.shared_types import ServiceType, JobStatus, Conversation
+from shared.shared_types import ServiceType, JobStatus, Conversation, PodcastOutline
 from shared.storage import StorageManager
 from shared.job import JobStatusManager
 from shared.otel import OpenTelemetryInstrumentation, OpenTelemetryConfig
@@ -33,17 +33,6 @@
 # Data Models
 
 
-class PodcastSegment(BaseModel):
-    section: str
-    descriptions: List[str]
-    duration: int
-
-
-class PodcastOutline(BaseModel):
-    title: str
-    segments: List[PodcastSegment]
-
-
 class TranscriptionRequest(BaseModel):
     markdown: str
     duration: int = 20
diff --git a/services/AgentService/prompts/Makefile b/services/AgentService/prompts/Makefile
new file mode 100644
index 0000000..6327b6d
--- /dev/null
+++ b/services/AgentService/prompts/Makefile
@@ -0,0 +1,56 @@
+# Colors for better visibility
+GREEN=\033[0;32m
+RED=\033[0;31m
+NC=\033[0m
+
+# Directory structure
+CONFIG_DIR=configs
+TEST_DIR=tests
+OUTPUTS_DIR=$(TEST_DIR)/outputs
+SCHEMAS_DIR=$(TEST_DIR)/schemas
+INPUTS_DIR=$(TEST_DIR)/inputs
+
+# Ensure promptfoo is installed
+ensure-promptfoo:
+	@echo "$(GREEN)Checking promptfoo installation...$(NC)"
+	@if ! command -v promptfoo >/dev/null 2>&1; then \
+		echo "$(RED)Error: promptfoo is not installed. Installing via brew...$(NC)"; \
+		brew install promptfoo || { echo "$(RED)Failed to install promptfoo$(NC)"; exit 1; }; \
+	fi
+
+# Generate JSON schemas
+generate-schemas:
+	@echo "$(GREEN)Generating JSON schemas from Pydantic models...$(NC)"
+	@python scripts/generate_schemas.py $(SCHEMAS_DIR)
+
+# Setup test environment
+setup-test: ensure-promptfoo
+	@echo "$(GREEN)Setting up test environment...$(NC)"
+	@mkdir -p $(INPUTS_DIR) $(OUTPUTS_DIR) $(SCHEMAS_DIR)
+	@make generate-schemas
+
+# Run all prompt tests
+test-prompts: setup-test
+	@echo "$(GREEN)Running all prompt tests...$(NC)"
+	@cd $(CONFIG_DIR) && python ../scripts/run_tests.py
+
+# Run up to a specific stage
+test-upto: setup-test
+	@if [ -z "$(stage)" ]; then \
+		echo "$(RED)Error: Please specify a stage number with 'stage=N'$(NC)"; \
+		exit 1; \
+	fi
+	@echo "$(GREEN)Running prompt tests up to stage $(stage)...$(NC)"
+	@cd $(CONFIG_DIR) && python ../scripts/run_tests.py --up-to $(stage)
+
+# List all available test stages
+test-list:
+	@echo "$(GREEN)Listing available test stages...$(NC)"
+	@python scripts/run_tests.py --list
+
+# Clean test outputs
+clean:
+	@echo "$(GREEN)Cleaning test outputs...$(NC)"
+	rm -rf $(TEST_DIR)
+
+.PHONY: ensure-promptfoo generate-schemas setup-test test-prompts test-upto clean test-list
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/README.md b/services/AgentService/prompts/README.md
similarity index 100%
rename from services/AgentService/eval-harness/README.md
rename to services/AgentService/prompts/README.md
diff --git a/services/AgentService/prompts/__init__.py b/services/AgentService/prompts/__init__.py
new file mode 100644
index 0000000..3865200
--- /dev/null
+++ b/services/AgentService/prompts/__init__.py
@@ -0,0 +1,21 @@
+from .prompts import (
+    RAW_OUTLINE_PROMPT,
+    OUTLINE_PROMPT,
+    SEGMENT_TRANSCRIPT_PROMPT,
+    DEEP_DIVE_PROMPT,
+    RAW_PODCAST_DIALOGUE_PROMPT_v2,
+    FUSE_OUTLINE_PROMPT,
+    REVISE_PROMPT,
+    PODCAST_DIALOGUE_PROMPT,
+)
+
+__all__ = [
+    'RAW_OUTLINE_PROMPT',
+    'OUTLINE_PROMPT',
+    'SEGMENT_TRANSCRIPT_PROMPT',
+    'DEEP_DIVE_PROMPT',
+    'RAW_PODCAST_DIALOGUE_PROMPT_v2',
+    'FUSE_OUTLINE_PROMPT',
+    'REVISE_PROMPT',
+    'PODCAST_DIALOGUE_PROMPT',
+]
\ No newline at end of file
diff --git a/services/AgentService/prompts/configs/01_raw_outline.yaml b/services/AgentService/prompts/configs/01_raw_outline.yaml
new file mode 100644
index 0000000..3d0a7cd
--- /dev/null
+++ b/services/AgentService/prompts/configs/01_raw_outline.yaml
@@ -0,0 +1,28 @@
+description: "01 Raw Outline Generation"
+evaluateOptions:
+  maxConcurrency: 1
+  showProgressBar: true
+
+prompts:
+  - "file://../prompts.py:PodcastPrompts.raw_outline_prompt"
+
+providers:
+  - id: "file://../providers/nim-405b.py"
+    label: "405b"
+
+tests:
+  - description: "Raw Outline Generation"
+    vars:
+      text: file://../data/eval.txt
+      duration: 15
+    storeOutputAs: raw_outline_output
+    assert:
+      - type: llm-rubric
+        value: |
+          Evaluate if the outline:
+          1. Has clear sections for background, innovation, impact, and future work
+          2. Makes innovation the focus
+          3. Allocates time appropriately for {{duration}} minutes
+          4. Maintains technical accuracy while being accessible
+          Pass if all scores >= 0.95
+        provider: "file://../scripts/nim-provider.py"
diff --git a/services/AgentService/prompts/configs/02_outline_json.yaml b/services/AgentService/prompts/configs/02_outline_json.yaml
new file mode 100644
index 0000000..b6a6a4d
--- /dev/null
+++ b/services/AgentService/prompts/configs/02_outline_json.yaml
@@ -0,0 +1,25 @@
+description: "02 Outline JSON Generation"
+evaluateOptions:
+  maxConcurrency: 1
+  showProgressBar: true
+
+prompts:
+  - "file://../prompts.py:PodcastPrompts.outline_prompt"
+
+providers:
+  - id: "file://../providers/nim-8b.py"
+    label: "8b"
+    config:
+      json_schema: file://../tests/schemas/podcast_outline.json
+
+tests:
+  - description: "Outline JSON Generation"
+    vars:
+      text: file://../tests/outputs/01_raw_outline_results.json
+      schema: file://../tests/schemas/podcast_outline.json
+    options:
+      transformVars: file://../scripts/get-output.py:get_transform
+    storeOutputAs: outline_json_output
+    assert:
+      - type: is-json
+        value: file://../tests/schemas/podcast_outline.json
diff --git a/services/AgentService/prompts.py b/services/AgentService/prompts/prompts.py
similarity index 100%
rename from services/AgentService/prompts.py
rename to services/AgentService/prompts/prompts.py
diff --git a/services/AgentService/prompts/providers/nim-405b.py b/services/AgentService/prompts/providers/nim-405b.py
new file mode 100644
index 0000000..c34b4b9
--- /dev/null
+++ b/services/AgentService/prompts/providers/nim-405b.py
@@ -0,0 +1,79 @@
+# chat_provider.py
+import requests
+import json
+from typing import Dict, Any
+
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://youngthug.demoz.io")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-405b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
+
+        # Setup headers
+        headers = {"Content-Type": "application/json"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False,
+        }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
+            }
+        else:
+            return {"error": "No choices in response"}
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/providers/nim-70b.py b/services/AgentService/prompts/providers/nim-70b.py
new file mode 100644
index 0000000..b0b70bd
--- /dev/null
+++ b/services/AgentService/prompts/providers/nim-70b.py
@@ -0,0 +1,79 @@
+# chat_provider.py
+import requests
+import json
+from typing import Dict, Any
+
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://small-nim-pc8kmx5ae.brevlab.com/")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-70b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
+
+        # Setup headers
+        headers = {"Content-Type": "application/json"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False,
+        }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
+            }
+        else:
+            return {"error": "No choices in response"}
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/providers/nim-8b.py b/services/AgentService/prompts/providers/nim-8b.py
new file mode 100644
index 0000000..97538c4
--- /dev/null
+++ b/services/AgentService/prompts/providers/nim-8b.py
@@ -0,0 +1,89 @@
+# Note we use 8b in json mode
+
+import requests
+import json
+from typing import Dict, Any
+
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://nim-pc8kmx5ae.brevlab.com")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-8b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
+        json_schema = config.get("json_schema", "")
+
+        # Setup headers
+        headers = {"Content-Type": "application/json"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False,
+        }
+
+        if json_schema != "":
+            # Remove 'file://' prefix if present
+            print(json_schema)
+            json_schema_path = "/Users/idhanani/Desktop/notebooklm/backend/services/AgentService/prompts/tests/schemas/podcast_outline.json"
+            with open(json_schema_path, "r") as file:
+                json_schema_content = file.read()
+                payload["nvext"] = {"guided_json": json_schema_content}
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
+            }
+        else:
+            return {"error": "No choices in response"}
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/scripts/generate_schemas.py b/services/AgentService/prompts/scripts/generate_schemas.py
new file mode 100644
index 0000000..ab6784e
--- /dev/null
+++ b/services/AgentService/prompts/scripts/generate_schemas.py
@@ -0,0 +1,36 @@
+# tests/generate_schemas.py
+import json
+import sys
+from pathlib import Path
+from shared.shared.shared_types import Conversation, PodcastOutline
+
+# Get the absolute path to the root directory
+root_dir = Path(__file__).resolve().parents[4]
+sys.path.append(str(root_dir))
+
+
+def generate_schemas(output_dir: Path):
+    """Generate JSON schemas from Pydantic models."""
+    # Ensure output directory exists
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Generate and save PodcastOutline schema
+    podcast_schema = PodcastOutline.model_json_schema()
+    with open(output_dir / "podcast_outline.json", "w") as f:
+        json.dump(podcast_schema, f, indent=2)
+    print("Generated podcast_outline.json")
+
+    # Generate and save Conversation schema
+    conversation_schema = Conversation.model_json_schema()
+    with open(output_dir / "conversation.json", "w") as f:
+        json.dump(conversation_schema, f, indent=2)
+    print("Generated conversation.json")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        output_dir = Path(sys.argv[1])
+    else:
+        output_dir = Path(__file__).parent / "schemas"
+
+    generate_schemas(output_dir)
diff --git a/services/AgentService/prompts/scripts/get-output.py b/services/AgentService/prompts/scripts/get-output.py
new file mode 100644
index 0000000..a9bc483
--- /dev/null
+++ b/services/AgentService/prompts/scripts/get-output.py
@@ -0,0 +1,29 @@
+import json
+
+
+def get_transform(vars, context):
+    """
+    Transform function for promptfoo that extracts the output field from evaluation results JSON
+    while preserving other variables.
+
+    Args:
+        vars (dict): Variables passed from promptfoo config
+        context (dict): Additional context from promptfoo
+
+    Returns:
+        dict: Transformed variables including the extracted output
+    """
+    try:
+        # Remove 'file://' prefix if present and get absolute path
+        file_path = vars["text"].replace("file://", "")
+
+        # Read and parse the JSON file directly without joining paths
+        with open(file_path, "r") as f:
+            data = json.load(f)
+
+        # Extract the output and return all vars with transformed text
+        return {**vars, "text": data["results"]["results"][0]["response"]["output"]}
+
+    except Exception as e:
+        print(f"Error transforming variables: {e}")
+        return {**vars, "error": f"Failed to transform variables: {str(e)}"}
diff --git a/services/AgentService/prompts/scripts/nim-provider.py b/services/AgentService/prompts/scripts/nim-provider.py
new file mode 100644
index 0000000..c34b4b9
--- /dev/null
+++ b/services/AgentService/prompts/scripts/nim-provider.py
@@ -0,0 +1,79 @@
+# chat_provider.py
+import requests
+import json
+from typing import Dict, Any
+
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://youngthug.demoz.io")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-405b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
+
+        # Setup headers
+        headers = {"Content-Type": "application/json"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False,
+        }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
+            }
+        else:
+            return {"error": "No choices in response"}
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/scripts/run_tests.py b/services/AgentService/prompts/scripts/run_tests.py
new file mode 100644
index 0000000..c728349
--- /dev/null
+++ b/services/AgentService/prompts/scripts/run_tests.py
@@ -0,0 +1,78 @@
+# prompts/scripts/run_tests.py
+import argparse
+import subprocess
+from pathlib import Path
+from typing import List
+
+
+class PromptTestRunner:
+    def __init__(self, config_dir: str = "configs"):
+        self.base_dir = Path(__file__).parent.parent  # prompts directory
+        self.config_dir = self.base_dir / config_dir
+        self.outputs_dir = self.base_dir / "tests/outputs"
+
+    def get_stage_configs(self) -> List[Path]:
+        """Get all numbered configuration files in order."""
+        return sorted(self.config_dir.glob("[0-9][0-9]_*.yaml"))
+
+    def run_stage(self, config_path: Path) -> bool:
+        """Run a single test stage using promptfoo."""
+        print(f"\n=== Running stage: {config_path.stem} ===")
+
+        # Create output path for this stage
+        output_path = self.outputs_dir / f"{config_path.stem}_results.json"
+
+        result = subprocess.run(
+            ["promptfoo", "eval", "-c", str(config_path), "--output", str(output_path)],
+            capture_output=True,
+            text=True,
+        )
+
+        # Print the output regardless of success/failure
+        if result.stdout:
+            print(result.stdout)
+        if result.returncode != 0:
+            print(f"Error: {result.stderr}")
+
+        return result.returncode == 0
+
+    def run_all_stages(self) -> None:
+        """Run all test stages in order."""
+        for config in self.get_stage_configs():
+            if not self.run_stage(config):
+                print(f"\nStage {config.stem} failed. Stopping pipeline.")
+                break
+
+    def run_up_to_stage(self, target_stage: int) -> None:
+        """Run all stages up to and including the target stage number."""
+        for config in self.get_stage_configs():
+            stage_num = int(config.stem.split("_")[0])
+            if stage_num > target_stage:
+                break
+            if not self.run_stage(config):
+                print(f"\nStage {config.stem} failed. Stopping pipeline.")
+                break
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run prompt tests in stages")
+    parser.add_argument("--up-to", type=int, help="Run all stages up to this number")
+    parser.add_argument(
+        "--list", action="store_true", help="List all available test stages"
+    )
+
+    args = parser.parse_args()
+    runner = PromptTestRunner()
+
+    if args.list:
+        print("Available test stages:")
+        for config in runner.get_stage_configs():
+            print(f"  - {config.stem}")
+    elif args.up_to is not None:
+        runner.run_up_to_stage(args.up_to)
+    else:
+        runner.run_all_stages()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/shared/shared/shared_types.py b/shared/shared/shared_types.py
index 352b452..2f3bce7 100644
--- a/shared/shared/shared_types.py
+++ b/shared/shared/shared_types.py
@@ -82,3 +82,14 @@ class ProcessingStep(BaseModel):
 
 class PromptTracker(BaseModel):
     steps: List[ProcessingStep]
+
+
+class PodcastSegment(BaseModel):
+    section: str
+    descriptions: List[str]
+    duration: int
+
+
+class PodcastOutline(BaseModel):
+    title: str
+    segments: List[PodcastSegment]