From 3357f7aa77b59eb1991a353d036e9eef40b98fb9 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sat, 9 Nov 2024 12:06:56 -0800
Subject: [PATCH 01/10] working example

---
 services/AgentService/prompts/Makefile        |  44 ++++-
 .../AgentService/prompts/generate_schemas.py  |  32 +++
 services/AgentService/prompts/nim-provider.py |  86 ++++++++
 .../AgentService/prompts/promptfooconfig.yaml | 183 ++++++++++++++++--
 .../AgentService/prompts/test-eval-pf.yaml    |  27 +++
 5 files changed, 349 insertions(+), 23 deletions(-)
 create mode 100644 services/AgentService/prompts/generate_schemas.py
 create mode 100644 services/AgentService/prompts/nim-provider.py
 create mode 100644 services/AgentService/prompts/test-eval-pf.yaml

diff --git a/services/AgentService/prompts/Makefile b/services/AgentService/prompts/Makefile
index 7ba5229..85de8e7 100644
--- a/services/AgentService/prompts/Makefile
+++ b/services/AgentService/prompts/Makefile
@@ -1,4 +1,16 @@
-promptfoo:
+# Colors for better visibility
+GREEN=\033[0;32m
+RED=\033[0;31m
+NC=\033[0m
+
+# Test directories
+TEST_DIR=tests
+OUTPUTS_DIR=$(TEST_DIR)/outputs
+SCHEMAS_DIR=$(TEST_DIR)/schemas
+INPUTS_DIR=$(TEST_DIR)/inputs
+
+# Ensure promptfoo is installed
+ensure-promptfoo:
 	@echo "$(GREEN)Checking npx installation...$(NC)"
 	@if ! command -v npx >/dev/null 2>&1; then \
 		echo "$(RED)Error: npx is not installed. Please install Node.js and npm first.$(NC)"; \
@@ -9,7 +21,31 @@ promptfoo:
 		echo "$(GREEN)Installing promptfoo...$(NC)"; \
 		npx promptfoo@latest --version >/dev/null 2>&1 || true; \
 	fi
-	@echo "$(GREEN)Running promptfoo in AgentService prompts directory...$(NC)"
-	npx promptfoo@latest eval
 
-.PHONY: promptfoo
\ No newline at end of file
+# Generate JSON schemas
+generate-schemas:
+	@echo "$(GREEN)Generating JSON schemas from Pydantic models...$(NC)"
+	@python generate_schemas.py $(SCHEMAS_DIR)
+
+# Setup test environment
+setup-test: ensure-promptfoo
+	@echo "$(GREEN)Setting up test environment...$(NC)"
+	@mkdir -p $(INPUTS_DIR) $(OUTPUTS_DIR) $(SCHEMAS_DIR)
+	@make generate-schemas
+
+# Run end-to-end test
+test-e2e: setup-test
+	@echo "$(GREEN)Running end-to-end tests...$(NC)"
+	npx promptfoo@latest eval 
+
+# Run original config
+test-basic: ensure-promptfoo
+	@echo "$(GREEN)Running basic promptfoo tests...$(NC)"
+	npx promptfoo@latest eval -c promptfooconfig.yaml
+
+# Clean test outputs
+clean:
+	@echo "$(GREEN)Cleaning test outputs...$(NC)"
+	rm -rf $(TEST_DIR)
+
+.PHONY: ensure-promptfoo generate-schemas setup-test test-e2e test-basic clean
\ No newline at end of file
diff --git a/services/AgentService/prompts/generate_schemas.py b/services/AgentService/prompts/generate_schemas.py
new file mode 100644
index 0000000..4fce6ae
--- /dev/null
+++ b/services/AgentService/prompts/generate_schemas.py
@@ -0,0 +1,32 @@
+# tests/generate_schemas.py
+import json
+import sys
+from pathlib import Path
+sys.path.append("../../..")  # Add root to path to import shared
+
+from shared.shared.shared_types import Conversation, PodcastOutline
+
+def generate_schemas(output_dir: Path):
+    """Generate JSON schemas from Pydantic models."""
+    # Ensure output directory exists
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Generate and save PodcastOutline schema
+    podcast_schema = PodcastOutline.model_json_schema()
+    with open(output_dir / "podcast_outline.json", "w") as f:
+        json.dump(podcast_schema, f, indent=2)
+    print("Generated podcast_outline.json")
+    
+    # Generate and save Conversation schema
+    conversation_schema = Conversation.model_json_schema()
+    with open(output_dir / "conversation.json", "w") as f:
+        json.dump(conversation_schema, f, indent=2)
+    print("Generated conversation.json")
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        output_dir = Path(sys.argv[1])
+    else:
+        output_dir = Path(__file__).parent / "schemas"
+    
+    generate_schemas(output_dir)
\ No newline at end of file
diff --git a/services/AgentService/prompts/nim-provider.py b/services/AgentService/prompts/nim-provider.py
new file mode 100644
index 0000000..8b1e8d1
--- /dev/null
+++ b/services/AgentService/prompts/nim-provider.py
@@ -0,0 +1,86 @@
+# chat_provider.py
+import requests
+import json
+from typing import Dict, Any, Optional
+
+def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+    
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+    
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get('config', {})
+        api_base = config.get('api_base', "https://youngthug.demoz.io")
+        api_key = config.get('api_key')
+        model = config.get('model', "meta/llama-3.1-405b-instruct")
+        temperature = config.get('temperature', 0.7)
+        max_tokens = config.get('max_tokens', 1000)
+
+        # Setup headers
+        headers = {
+            "Content-Type": "application/json"
+        }
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False  
+        }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get("usage", {
+                    "total": 0,
+                    "prompt": 0,
+                    "completion": 0
+                })
+            }
+        else:
+            return {
+                "error": "No choices in response"
+            }
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {
+            "error": f"Unexpected error: {str(e)}"
+        }
\ No newline at end of file
diff --git a/services/AgentService/prompts/promptfooconfig.yaml b/services/AgentService/prompts/promptfooconfig.yaml
index 41c47f1..b37074f 100644
--- a/services/AgentService/prompts/promptfooconfig.yaml
+++ b/services/AgentService/prompts/promptfooconfig.yaml
@@ -1,27 +1,172 @@
-# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
-
-# Learn more about building a configuration: https://promptfoo.dev/docs/configuration/guide
-
-description: "Podcast Prompts"
+# File: tests/e2e_test.yaml
+description: "Complete End-to-End Podcast Generation Test"
 
 prompts:
-  - file://./prompts_iter.py:PodcastPrompts.raw_outline_prompt
+  # - name: "raw_outline"
+  - "file://./prompts.py:PodcastPrompts.raw_outline_prompt"
+  # - name: "outline"
+  #   file: "./prompts.py:PodcastPrompts.outline_prompt"
+  # - name: "segment_transcript"
+  #   file: "../prompts.py:PodcastPrompts.segment_transcript_prompt"
+  # - name: "deep_dive"
+  #   file: "../prompts.py:PodcastPrompts.deep_dive_prompt"
+  # - name: "transcript"
+  #   file: "../prompts.py:PodcastPrompts.transcript_prompt"
+  # - name: "raw_podcast_dialogue"
+  #   file: "../prompts.py:PodcastPrompts.raw_podcast_dialogue_prompt_v2"
+  # - name: "fuse_outline"
+  #   file: "../prompts.py:PodcastPrompts.fuse_outline_prompt"
+  # - name: "revise"
+  #   file: "../prompts.py:PodcastPrompts.revise_prompt"
+  # - name: "podcast_dialogue"
+  #   file: "../prompts.py:PodcastPrompts.podcast_dialogue_prompt"
 
 providers:
-  - id: "https://integrate.api.nvidia.com/v1/chat/completions"
+  - id: "file://./nim-provider.py"
+    label: "405b"
     config:
-      method: "POST"
-      headers:
-        "Content-Type": "application/json"
-        "Authorization": "Bearer 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"
-      body:
-        model: "meta/llama-3.1-405b-instruct"
-        messages:
-          - role: "user"
-            content: "{{prompt}}"
-      responseParser: "json.choices[0].message.content"
+      api_base: "https://youngthug.demoz.io"
+      model: "meta/llama-3.1-405b-instruct"
+      temperature: 0.7
+      max_tokens: 1000
 
 tests:
-  - vars:
-      duration: 10
+  - description: "Raw Outline Generation"
+    vars:
       text: file://./eval.txt
+      duration: 15
+    assert:
+      - type: llm-rubric
+        value: |
+          Evaluate if the outline:
+          1. Has clear sections for background, innovation, impact, and future work
+          2. Makes innovation the focus
+          3. Allocates time appropriately for {{duration}} minutes
+          4. Maintains technical accuracy while being accessible
+          Pass if all scores >= 0.95
+        provider: "file://./nim-provider.py"
+
+  # # Step 2: JSON Outline
+  # - description: "JSON Outline Conversion"
+  #   vars:
+  #     text: "{{previousOutput}}"
+  #     schema: file://tests/schemas/podcast_outline.json
+  #   prompt: outline
+  #   assert:
+  #     - type: contains-json
+  #     - type: json-schema
+  #       value: file://tests/schemas/podcast_outline.json
+
+  # # Step 3: Segment Processing
+  # - description: "Segment Transcript Generation"
+  #   vars:
+  #     text: file://tests/inputs/sample.txt
+  #     duration: 5
+  #     topic: "Neural Network Architecture"
+  #     angles: "Technical implementation, Results, Impact"
+  #   prompt: segment_transcript
+  #   assert:
+  #     - type: llm-rubric
+  #       value: |
+  #         Check if the transcript:
+  #         1. Explains concepts clearly for no prior knowledge
+  #         2. Uses analogies and examples
+  #         3. Addresses potential questions
+  #         4. Provides comprehensive context
+  #         5. Fits {{duration}} minute length
+  #         Pass if all >= 0.7
+
+  # # Step 4: Deep Dive Analysis
+  # - description: "Deep Dive Processing"
+  #   vars:
+  #     text: file://tests/inputs/sample.txt
+  #     topic: "Neural Networks for Climate Prediction"
+  #     duration: 7
+  #   prompt: deep_dive
+  #   assert:
+  #     - type: llm-rubric
+  #       value: |
+  #         Verify the deep dive:
+  #         1. Expands content with details and examples
+  #         2. Creates clear topic/subtopic structure
+  #         3. Maintains focus on important aspects
+  #         4. Fits {{duration}} minute timeframe
+  #         Pass if all >= 0.75
+
+  # # Step 5: Transcript Combination
+  # - description: "Transcript Combination"
+  #   vars:
+  #     segments: [["First segment content", 5], ["Second segment content", 5]]
+  #   prompt: transcript
+  #   assert:
+  #     - type: llm-rubric
+  #       value: |
+  #         Check if combined transcript:
+  #         1. Maintains all original content
+  #         2. Has natural flow between segments
+  #         3. Keeps storytelling elements
+  #         Pass if all >= 0.7
+
+  # # Step 6: Raw Dialogue Generation
+  # - description: "Raw Podcast Dialogue"
+  #   vars:
+  #     text: "{{previousOutput}}"
+  #     duration: 15
+  #     speaker_1_name: "Dr. Sarah"
+  #     speaker_2_name: "Prof. Mike"
+  #     descriptions: "Neural Networks, Climate Prediction"
+  #   prompt: raw_podcast_dialogue
+  #   assert:
+  #     - type: llm-rubric
+  #       value: |
+  #         Evaluate dialogue:
+  #         1. Natural conversation with appropriate fillers
+  #         2. Good speaker alternation
+  #         3. Maintains technical accuracy
+  #         4. Includes required conversation elements
+  #         Pass if all >= 0.75
+
+  # # Step 7: Outline Fusion
+  # - description: "Outline Fusion"
+  #   vars:
+  #     overall_outline: "Main outline content"
+  #     sub_outline: "Detailed section outline"
+  #   prompt: fuse_outline
+  #   assert:
+  #     - type: llm-rubric
+  #       value: |
+  #         Check if fused outline:
+  #         1. Combines both outlines without loss
+  #         2. Maintains proper structure
+  #         3. Uses correct time budgets
+  #         Pass if all >= 0.7
+
+  # # Step 8: Dialogue Revision
+  # - description: "Dialogue Revision"
+  #   vars:
+  #     outline: "Podcast outline"
+  #     raw_transcript: "Raw content"
+  #     dialogue_transcript: "Initial dialogue"
+  #   prompt: revise
+  #   assert:
+  #     - type: llm-rubric
+  #       value: |
+  #         Verify revised dialogue:
+  #         1. Natural conversation flow
+  #         2. Proper speaker exchanges
+  #         3. No explicit transitions
+  #         4. No information loss
+  #         Pass if all >= 0.8
+
+  # # Step 9: Final JSON Conversion
+  # - description: "Final JSON Format"
+  #   vars:
+  #     text: "{{previousOutput}}"
+  #     schema: file://tests/schemas/conversation.json
+  #     speaker_1_name: "Dr. Sarah"
+  #     speaker_2_name: "Prof. Mike"
+  #   prompt: podcast_dialogue
+  #   assert:
+  #     - type: contains-json
+  #     - type: json-schema
+  #       value: file://tests/schemas/conversation.json
diff --git a/services/AgentService/prompts/test-eval-pf.yaml b/services/AgentService/prompts/test-eval-pf.yaml
new file mode 100644
index 0000000..41c47f1
--- /dev/null
+++ b/services/AgentService/prompts/test-eval-pf.yaml
@@ -0,0 +1,27 @@
+# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
+
+# Learn more about building a configuration: https://promptfoo.dev/docs/configuration/guide
+
+description: "Podcast Prompts"
+
+prompts:
+  - file://./prompts_iter.py:PodcastPrompts.raw_outline_prompt
+
+providers:
+  - id: "https://integrate.api.nvidia.com/v1/chat/completions"
+    config:
+      method: "POST"
+      headers:
+        "Content-Type": "application/json"
+        "Authorization": "Bearer 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"
+      body:
+        model: "meta/llama-3.1-405b-instruct"
+        messages:
+          - role: "user"
+            content: "{{prompt}}"
+      responseParser: "json.choices[0].message.content"
+
+tests:
+  - vars:
+      duration: 10
+      text: file://./eval.txt

From 9e0502482b5d52dc8a5b05f5ccd8700329be08d0 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sat, 9 Nov 2024 13:20:02 -0800
Subject: [PATCH 02/10] iter

---
 services/AgentService/prompts/Makefile        |  2 +-
 .../AgentService/prompts/promptfooconfig.yaml | 32 ++++++++++---------
 .../AgentService/prompts/test-eval-pf.yaml    | 27 ----------------
 3 files changed, 18 insertions(+), 43 deletions(-)
 delete mode 100644 services/AgentService/prompts/test-eval-pf.yaml

diff --git a/services/AgentService/prompts/Makefile b/services/AgentService/prompts/Makefile
index 85de8e7..c5a35b9 100644
--- a/services/AgentService/prompts/Makefile
+++ b/services/AgentService/prompts/Makefile
@@ -36,7 +36,7 @@ setup-test: ensure-promptfoo
 # Run end-to-end test
 test-e2e: setup-test
 	@echo "$(GREEN)Running end-to-end tests...$(NC)"
-	npx promptfoo@latest eval 
+	npx promptfoo@latest eval --output $(OUTPUTS_DIR)/out.html
 
 # Run original config
 test-basic: ensure-promptfoo
diff --git a/services/AgentService/prompts/promptfooconfig.yaml b/services/AgentService/prompts/promptfooconfig.yaml
index b37074f..0de4de2 100644
--- a/services/AgentService/prompts/promptfooconfig.yaml
+++ b/services/AgentService/prompts/promptfooconfig.yaml
@@ -1,11 +1,13 @@
-# File: tests/e2e_test.yaml
 description: "Complete End-to-End Podcast Generation Test"
+evaluateOptions:
+  maxConcurrency: 1
+  showProgressBar: true
 
 prompts:
-  # - name: "raw_outline"
-  - "file://./prompts.py:PodcastPrompts.raw_outline_prompt"
-  # - name: "outline"
-  #   file: "./prompts.py:PodcastPrompts.outline_prompt"
+  - id: "./prompts.py:PodcastPrompts.raw_outline_prompt"
+    label: "raw_outline"
+  - id: "./prompts.py:PodcastPrompts.outline_prompt"
+    label: "outline"
   # - name: "segment_transcript"
   #   file: "../prompts.py:PodcastPrompts.segment_transcript_prompt"
   # - name: "deep_dive"
@@ -35,6 +37,7 @@ tests:
     vars:
       text: file://./eval.txt
       duration: 15
+    storeOutputAs: raw_outline_output
     assert:
       - type: llm-rubric
         value: |
@@ -46,16 +49,15 @@ tests:
           Pass if all scores >= 0.95
         provider: "file://./nim-provider.py"
 
-  # # Step 2: JSON Outline
-  # - description: "JSON Outline Conversion"
-  #   vars:
-  #     text: "{{previousOutput}}"
-  #     schema: file://tests/schemas/podcast_outline.json
-  #   prompt: outline
-  #   assert:
-  #     - type: contains-json
-  #     - type: json-schema
-  #       value: file://tests/schemas/podcast_outline.json
+  # Step 2: JSON Outline
+  - description: "JSON Outline Conversion"
+    vars:
+      text: "{{raw_outline_output}}"
+      schema: file://tests/schemas/podcast_outline.json
+    prompt: outline
+    assert:
+      - type: contains-json
+        value: file://tests/schemas/podcast_outline.json
 
   # # Step 3: Segment Processing
   # - description: "Segment Transcript Generation"
diff --git a/services/AgentService/prompts/test-eval-pf.yaml b/services/AgentService/prompts/test-eval-pf.yaml
deleted file mode 100644
index 41c47f1..0000000
--- a/services/AgentService/prompts/test-eval-pf.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# yaml-language-server: $schema=https://promptfoo.dev/config-schema.json
-
-# Learn more about building a configuration: https://promptfoo.dev/docs/configuration/guide
-
-description: "Podcast Prompts"
-
-prompts:
-  - file://./prompts_iter.py:PodcastPrompts.raw_outline_prompt
-
-providers:
-  - id: "https://integrate.api.nvidia.com/v1/chat/completions"
-    config:
-      method: "POST"
-      headers:
-        "Content-Type": "application/json"
-        "Authorization": "Bearer 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"
-      body:
-        model: "meta/llama-3.1-405b-instruct"
-        messages:
-          - role: "user"
-            content: "{{prompt}}"
-      responseParser: "json.choices[0].message.content"
-
-tests:
-  - vars:
-      duration: 10
-      text: file://./eval.txt

From e0246f76c66906777eeaaf0ee6fe9e9f12bd0b84 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sat, 9 Nov 2024 13:20:44 -0800
Subject: [PATCH 03/10] iter

---
 .gitignore                    |  3 ++-
 services/AgentService/main.py | 14 +-------------
 shared/shared/shared_types.py | 11 +++++++++++
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.gitignore b/.gitignore
index efb5264..c6c4be6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@ get_helm.sh
 .env
 .ruff_cache
 data/minio
-eval.txt
\ No newline at end of file
+eval.txt
+services/AgentService/prompts/tests
\ No newline at end of file
diff --git a/services/AgentService/main.py b/services/AgentService/main.py
index a85689c..a12e979 100644
--- a/services/AgentService/main.py
+++ b/services/AgentService/main.py
@@ -1,5 +1,5 @@
 from fastapi import FastAPI, BackgroundTasks, HTTPException
-from shared.shared_types import ServiceType, JobStatus, Conversation
+from shared.shared_types import ServiceType, JobStatus, Conversation, PodcastSegment, PodcastOutline
 from shared.storage import StorageManager
 from shared.job import JobStatusManager
 import flexagent as fa
@@ -30,18 +30,6 @@
 
 # Data Models
 
-
-class PodcastSegment(BaseModel):
-    section: str
-    descriptions: List[str]
-    duration: int
-
-
-class PodcastOutline(BaseModel):
-    title: str
-    segments: List[PodcastSegment]
-
-
 class TranscriptionRequest(BaseModel):
     markdown: str
     duration: int = 20
diff --git a/shared/shared/shared_types.py b/shared/shared/shared_types.py
index 352b452..5501d7a 100644
--- a/shared/shared/shared_types.py
+++ b/shared/shared/shared_types.py
@@ -82,3 +82,14 @@ class ProcessingStep(BaseModel):
 
 class PromptTracker(BaseModel):
     steps: List[ProcessingStep]
+
+
+class PodcastSegment(BaseModel):
+    section: str
+    descriptions: List[str]
+    duration: int
+
+
+class PodcastOutline(BaseModel):
+    title: str
+    segments: List[PodcastSegment]
\ No newline at end of file

From 9e94c3dee37536e0d86243bbeabb1dc5b21ecda3 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sat, 9 Nov 2024 15:28:18 -0800
Subject: [PATCH 04/10] mega refactor

---
 services/AgentService/prompts/Makefile        |  56 ++++++
 services/AgentService/prompts/README.md       |  10 +
 .../prompts/configs/01_raw_outline.yaml       |  34 ++++
 services/AgentService/prompts/prompts.py      | 180 ++++++++++++++++++
 .../prompts/scripts/generate_schemas.py       |  35 ++++
 .../prompts/scripts/nim-provider.py           |  86 +++++++++
 .../AgentService/prompts/scripts/run_tests.py |  75 ++++++++
 7 files changed, 476 insertions(+)
 create mode 100644 services/AgentService/prompts/Makefile
 create mode 100644 services/AgentService/prompts/README.md
 create mode 100644 services/AgentService/prompts/configs/01_raw_outline.yaml
 create mode 100644 services/AgentService/prompts/prompts.py
 create mode 100644 services/AgentService/prompts/scripts/generate_schemas.py
 create mode 100644 services/AgentService/prompts/scripts/nim-provider.py
 create mode 100644 services/AgentService/prompts/scripts/run_tests.py

diff --git a/services/AgentService/prompts/Makefile b/services/AgentService/prompts/Makefile
new file mode 100644
index 0000000..31f71e1
--- /dev/null
+++ b/services/AgentService/prompts/Makefile
@@ -0,0 +1,56 @@
+# Colors for better visibility
+GREEN=\033[0;32m
+RED=\033[0;31m
+NC=\033[0m
+
+# Directory structure
+CONFIG_DIR=configs
+TEST_DIR=tests
+OUTPUTS_DIR=$(TEST_DIR)/outputs
+SCHEMAS_DIR=$(TEST_DIR)/schemas
+INPUTS_DIR=$(TEST_DIR)/inputs
+
+# Ensure promptfoo is installed
+ensure-promptfoo:
+	@echo "$(GREEN)Checking promptfoo installation...$(NC)"
+	@if ! command -v promptfoo >/dev/null 2>&1; then \
+		echo "$(RED)Error: promptfoo is not installed. Installing via brew...$(NC)"; \
+		brew install promptfoo || { echo "$(RED)Failed to install promptfoo$(NC)"; exit 1; }; \
+	fi
+
+# Generate JSON schemas
+generate-schemas:
+	@echo "$(GREEN)Generating JSON schemas from Pydantic models...$(NC)"
+	@python scripts/generate_schemas.py $(SCHEMAS_DIR)
+
+# Setup test environment
+setup-test: ensure-promptfoo
+	@echo "$(GREEN)Setting up test environment...$(NC)"
+	@mkdir -p $(INPUTS_DIR) $(OUTPUTS_DIR) $(SCHEMAS_DIR)
+	@make generate-schemas
+
+# Run all prompt tests
+test-prompts: setup-test
+	@echo "$(GREEN)Running all prompt tests...$(NC)"
+	@cd $(CONFIG_DIR) && python ../scripts/run_tests.py
+
+# Run up to a specific stage
+test-upto: setup-test
+	@if [ -z "$(stage)" ]; then \
+		echo "$(RED)Error: Please specify a stage number with 'stage=N'$(NC)"; \
+		exit 1; \
+	fi
+	@echo "$(GREEN)Running prompt tests up to stage $(stage)...$(NC)"
+	@cd $(CONFIG_DIR) && python ../scripts/run_tests.py --up-to $(stage)
+
+# List all available test stages
+test-list: setup-test
+	@echo "$(GREEN)Listing available test stages...$(NC)"
+	@python scripts/run_tests.py --list
+
+# Clean test outputs
+clean:
+	@echo "$(GREEN)Cleaning test outputs...$(NC)"
+	rm -rf $(TEST_DIR)
+
+.PHONY: ensure-promptfoo generate-schemas setup-test test-prompts test-upto clean test-list
\ No newline at end of file
diff --git a/services/AgentService/prompts/README.md b/services/AgentService/prompts/README.md
new file mode 100644
index 0000000..a128bdf
--- /dev/null
+++ b/services/AgentService/prompts/README.md
@@ -0,0 +1,10 @@
+To get started, set your OPENAI_API_KEY environment variable, or other required keys for the providers you selected.
+
+Next, edit promptfooconfig.yaml.
+
+Then run:
+```
+promptfoo eval
+```
+
+Afterwards, you can view the results by running `promptfoo view`
diff --git a/services/AgentService/prompts/configs/01_raw_outline.yaml b/services/AgentService/prompts/configs/01_raw_outline.yaml
new file mode 100644
index 0000000..69abbba
--- /dev/null
+++ b/services/AgentService/prompts/configs/01_raw_outline.yaml
@@ -0,0 +1,34 @@
+description: "01 Raw Outline Generation"
+evaluateOptions:
+  maxConcurrency: 1
+  showProgressBar: true
+
+prompts:
+  - id: "../prompts.py:PodcastPrompts.raw_outline_prompt"
+    label: "raw_outline"
+
+providers:
+  - id: "file://../scripts/nim-provider.py"
+    label: "405b"
+    config:
+      api_base: "https://youngthug.demoz.io"
+      model: "meta/llama-3.1-405b-instruct"
+      temperature: 0.7
+      max_tokens: 1000
+
+tests:
+  - description: "Raw Outline Generation"
+    vars:
+      text: file://../data/eval.txt
+      duration: 15
+    storeOutputAs: raw_outline_output
+    assert:
+      - type: llm-rubric
+        value: |
+          Evaluate if the outline:
+          1. Has clear sections for background, innovation, impact, and future work
+          2. Makes innovation the focus
+          3. Allocates time appropriately for {{duration}} minutes
+          4. Maintains technical accuracy while being accessible
+          Pass if all scores >= 0.95
+        provider: "file://../scripts/nim-provider.py"
diff --git a/services/AgentService/prompts/prompts.py b/services/AgentService/prompts/prompts.py
new file mode 100644
index 0000000..c0c5d1c
--- /dev/null
+++ b/services/AgentService/prompts/prompts.py
@@ -0,0 +1,180 @@
+import jinja2
+
+# Raw string prompts
+RAW_OUTLINE_PROMPT_STR = """I want to make the following paper into a podcast transcript for {{ duration }} minutes, to help audience understand background, innovation, impact and future work. 
+
+Come up the structure of the podcast.
+                                 
+{{ text }}
+                                     
+Innovation should be the focus of the podcast, and the most important part of the podcast, with enough details."""
+
+OUTLINE_PROMPT_STR = """Given the free form outline, convert in into a structured outline without losing any information.                                 
+
+{{ text }}
+                                                           
+The result must conform to the following JSON schema:\n{{ schema }}\n\n"""
+
+SEGMENT_TRANSCRIPT_PROMPT_STR = """Make a transcript given the text:
+
+{{ text }}
+                                            
+The transcript is about {{ duration }} minutes, approximately {{ (duration * 180) | int }} words.
+The transcript's subject is {{ topic }}, and should focus on the following topics: {{ angles }}
+                                            
+Explain all concepts clearly, assuming no prior knowledge
+Use analogies, stories, or examples to illustrate points
+Address potential questions or counterarguments
+Provide context and background information throughout
+Make sure the details, numbers are accurate and comprehensive
+                                            
+Dive deep into each topic, and provide enough details given the time budget, don't leave any stone unturned."""
+
+DEEP_DIVE_PROMPT_STR = """You will be given some content, short ideas or thoughts about the content.
+
+Your task is to expand the content into a detailed and comprehensive explanation, with enough details and examples.
+
+Here is the content
+
+{{text}}
+                                   
+The topic will be around
+                                   
+{{topic}}
+                                   
+Dive deep into each topic, come up with an outline with topics and subtopics to help fully understand the content.
+Expand the topics, don't add any other topics. Allocate time budget for each topic. Total time budget should be {{ duration }} minutes.
+Focus on the most important topics and ideas, and allocate more time budget to them.
+Avoid introduction and conclusion in the outline, focus on expanding into subtopics."""
+
+TRANSCRIPT_PROMPT_STR = """Given the transcript of different segments,combine and optimize the transcript to make the flow more natural.
+The content should be strictly following the transcript, and only optimize the flow. Keep all the details, and storytelling contents.
+
+{% for segment, duration in segments %}
+
+Time budget: {{ duration }} minutes, approximately {{ (duration * 180) | int }} words.
+{{ segment }}                                    
+
+{% endfor %}
+                                    
+Only return the full transcript, no need to include any other information like time budget or segment name."""
+
+RAW_PODCAST_DIALOGUE_PROMPT_V2_STR = """Your task is to transform the provided input transcript into a lively, engaging, and informative podcast dialogue. 
+
+There are two speakers, speaker-1 and speaker-2.
+speaker-1's name is {{ speaker_1_name }}, and speaker-2's name is {{ speaker_2_name }}.
+
+Given the following conversation, make introductions for both speakers at beginning of the conversation.
+During the conversation, occasionally mention the speaker's name to refer to them, to make the conversation more natural.
+Incorporate natural speech patterns, including occasional verbal fillers (e.g., "um," "well," "you know")
+Use casual language and ensure the dialogue flows smoothly, reflecting a real-life conversation
+The fillers should be used naturally, not in every sentence, and not in a robotic way but related to topic and conversation context.
+                                          
+Maintain a lively pace with a mix of serious discussion and lighter moments
+Use rhetorical questions or hypotheticals to involve the listener
+Create natural moments of reflection or emphasis
+     
+Allow for natural interruptions and back-and-forth between host and guest
+Ensure the guest's responses are substantiated by the input text, avoiding unsupported claims                                   
+Avoid long sentences from either speaker, break them into conversations between two speakers.
+Throughout the script, strive for authenticity in the conversation. Include:
+   - Moments of genuine curiosity or surprise from the host
+   - Instances where the guest might briefly struggle to articulate a complex idea
+   - Light-hearted moments or humor when appropriate
+   - Brief personal anecdotes or examples that relate to the topic (within the bounds of the input text)
+                 
+Don't lose any information or details in the transcript. It is only format conversion, so strictly follow the transcript.
+                                                 
+This segment is about {{ duration }} minutes, approximately {{ (duration * 180) | int }} words.
+The topic is {{ descriptions }}
+                                          
+You should keep all analogies, stories, examples, and quotes from the transcript.
+
+Here is the transcript:
+{{text}}
+                                          
+Only return the full dialogue transcript, no need to include any other information like time budget or segment name.
+Don't add introduction and ending to the dialogue unless it is provided in the transcript."""
+
+FUSE_OUTLINE_PROMPT_STR = """You are given two outlines, one is overall outline, another is sub-outline for one section in the overall outline.
+You need to fuse the two outlines into a new outline, to represent the whole podcast without losing any descriptions in sub sections.
+Ignore the time budget in the sub-outline, and use the time budget in the overall outline.
+Overall outline:
+{{ overall_outline }}
+
+Sub-outline:
+{{ sub_outline }}
+
+Output the new outline with the tree structure."""
+
+REVISE_PROMPT_STR = """You are given a podcast dialogue transcript, and a raw transcript of the podcast.
+You are only allowed to copy information from the raw dialogue transcript to make the conversation more natural and engaging, but exactly follow the outline.
+                                
+Outline:
+{{ outline}}
+
+Here is the dialogue transcript:
+{{ dialogue_transcript }}
+
+You need also to break long sentences from either speaker into conversations between two speakers, by inserting more dialogue entries and verbal fillers (e.g., "um")
+Don't let a single speaker talk more than 2 sentences, and break the conversation into multiple exchanges between two speakers.
+                                
+Don't make any explict transition between sections, this is one podcast, and the sections are connected.
+Don't use words like "Welcome back" or "Now we are going to talk about" etc.
+Don't make introductions in the middle of the conversation.
+Merge related topics according to outline and don't repeat same things in different place.
+                                
+Don't lose any information or details from the raw transcript, only make the conversation flow more natural."""
+
+PODCAST_DIALOGUE_PROMPT_STR = """Given a podcast transcript between two speakers, convert it into a structured JSON format.
+- Only do conversion
+- Don't miss any information in the transcript
+
+There are two speakers, speaker-1 and speaker-2.
+speaker-1's name is {{ speaker_1_name }}, and speaker-2's name is {{ speaker_2_name }}.
+                                          
+Here is the original transcript:
+{{ text }}
+                                          
+The result must conform to the following JSON schema:\n{{ schema }}\n\n"""
+
+# Wrap raw strings in Jinja templates
+RAW_OUTLINE_PROMPT = jinja2.Template(RAW_OUTLINE_PROMPT_STR)
+OUTLINE_PROMPT = jinja2.Template(OUTLINE_PROMPT_STR)
+SEGMENT_TRANSCRIPT_PROMPT = jinja2.Template(SEGMENT_TRANSCRIPT_PROMPT_STR)
+DEEP_DIVE_PROMPT = jinja2.Template(DEEP_DIVE_PROMPT_STR)
+TRANSCRIPT_PROMPT = jinja2.Template(TRANSCRIPT_PROMPT_STR)
+RAW_PODCAST_DIALOGUE_PROMPT_v2 = jinja2.Template(RAW_PODCAST_DIALOGUE_PROMPT_V2_STR)
+FUSE_OUTLINE_PROMPT = jinja2.Template(FUSE_OUTLINE_PROMPT_STR)
+REVISE_PROMPT = jinja2.Template(REVISE_PROMPT_STR)
+PODCAST_DIALOGUE_PROMPT = jinja2.Template(PODCAST_DIALOGUE_PROMPT_STR)
+
+
+# Class to hold all prompts
+class PodcastPrompts:
+    def raw_outline_prompt(self):
+        return RAW_OUTLINE_PROMPT_STR
+
+    def outline_prompt(self):
+        return OUTLINE_PROMPT_STR
+
+    def segment_transcript_prompt(self):
+        return SEGMENT_TRANSCRIPT_PROMPT_STR
+
+    def deep_dive_prompt(self):
+        return DEEP_DIVE_PROMPT_STR
+
+    def transcript_prompt(self):
+        return TRANSCRIPT_PROMPT_STR
+
+    def raw_podcast_dialogue_prompt_v2(self):
+        return RAW_PODCAST_DIALOGUE_PROMPT_V2_STR
+
+    def fuse_outline_prompt(self):
+        return FUSE_OUTLINE_PROMPT_STR
+
+    def revise_prompt(self):
+        return REVISE_PROMPT_STR
+
+    def podcast_dialogue_prompt(self):
+        return PODCAST_DIALOGUE_PROMPT_STR
diff --git a/services/AgentService/prompts/scripts/generate_schemas.py b/services/AgentService/prompts/scripts/generate_schemas.py
new file mode 100644
index 0000000..1fe8d7f
--- /dev/null
+++ b/services/AgentService/prompts/scripts/generate_schemas.py
@@ -0,0 +1,35 @@
+# tests/generate_schemas.py
+import json
+import sys
+from pathlib import Path
+
+# Get the absolute path to the root directory
+root_dir = Path(__file__).resolve().parents[4]
+sys.path.append(str(root_dir))
+
+from shared.shared.shared_types import Conversation, PodcastOutline
+
+def generate_schemas(output_dir: Path):
+    """Generate JSON schemas from Pydantic models."""
+    # Ensure output directory exists
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Generate and save PodcastOutline schema
+    podcast_schema = PodcastOutline.model_json_schema()
+    with open(output_dir / "podcast_outline.json", "w") as f:
+        json.dump(podcast_schema, f, indent=2)
+    print("Generated podcast_outline.json")
+    
+    # Generate and save Conversation schema
+    conversation_schema = Conversation.model_json_schema()
+    with open(output_dir / "conversation.json", "w") as f:
+        json.dump(conversation_schema, f, indent=2)
+    print("Generated conversation.json")
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        output_dir = Path(sys.argv[1])
+    else:
+        output_dir = Path(__file__).parent / "schemas"
+    
+    generate_schemas(output_dir)
\ No newline at end of file
diff --git a/services/AgentService/prompts/scripts/nim-provider.py b/services/AgentService/prompts/scripts/nim-provider.py
new file mode 100644
index 0000000..8b1e8d1
--- /dev/null
+++ b/services/AgentService/prompts/scripts/nim-provider.py
@@ -0,0 +1,86 @@
+# chat_provider.py
+import requests
+import json
+from typing import Dict, Any, Optional
+
+def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+    
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+    
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get('config', {})
+        api_base = config.get('api_base', "https://youngthug.demoz.io")
+        api_key = config.get('api_key')
+        model = config.get('model', "meta/llama-3.1-405b-instruct")
+        temperature = config.get('temperature', 0.7)
+        max_tokens = config.get('max_tokens', 1000)
+
+        # Setup headers
+        headers = {
+            "Content-Type": "application/json"
+        }
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False  
+        }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get("usage", {
+                    "total": 0,
+                    "prompt": 0,
+                    "completion": 0
+                })
+            }
+        else:
+            return {
+                "error": "No choices in response"
+            }
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {
+            "error": f"Unexpected error: {str(e)}"
+        }
\ No newline at end of file
diff --git a/services/AgentService/prompts/scripts/run_tests.py b/services/AgentService/prompts/scripts/run_tests.py
new file mode 100644
index 0000000..d5da0ae
--- /dev/null
+++ b/services/AgentService/prompts/scripts/run_tests.py
@@ -0,0 +1,75 @@
+# prompts/scripts/run_tests.py
+import argparse
+import subprocess
+from pathlib import Path
+from typing import List, Optional
+
+class PromptTestRunner:
+    def __init__(self, config_dir: str = "configs"):
+        self.base_dir = Path(__file__).parent.parent  # prompts directory
+        self.config_dir = self.base_dir / config_dir
+        self.outputs_dir = self.base_dir / "tests/outputs"
+        
+    def get_stage_configs(self) -> List[Path]:
+        """Get all numbered configuration files in order."""
+        return sorted(self.config_dir.glob("[0-9][0-9]_*.yaml"))
+    
+    def run_stage(self, config_path: Path) -> bool:
+        """Run a single test stage using promptfoo."""
+        print(f"\n=== Running stage: {config_path.stem} ===")
+        
+        # Create output path for this stage
+        output_path = self.outputs_dir / f"{config_path.stem}_results.html"
+        
+        result = subprocess.run(
+            ["promptfoo", "eval", 
+             "-c", str(config_path),
+             "--output", str(output_path)],
+            capture_output=True,
+            text=True
+        )
+        
+        # Print the output regardless of success/failure
+        if result.stdout:
+            print(result.stdout)
+        if result.returncode != 0:
+            print(f"Error: {result.stderr}")
+            
+        return result.returncode == 0
+    
+    def run_all_stages(self) -> None:
+        """Run all test stages in order."""
+        for config in self.get_stage_configs():
+            if not self.run_stage(config):
+                print(f"\nStage {config.stem} failed. Stopping pipeline.")
+                break
+    
+    def run_up_to_stage(self, target_stage: int) -> None:
+        """Run all stages up to and including the target stage number."""
+        for config in self.get_stage_configs():
+            stage_num = int(config.stem.split("_")[0])
+            if stage_num > target_stage:
+                break
+            if not self.run_stage(config):
+                print(f"\nStage {config.stem} failed. Stopping pipeline.")
+                break
+
+def main():
+    parser = argparse.ArgumentParser(description="Run prompt tests in stages")
+    parser.add_argument("--up-to", type=int, help="Run all stages up to this number")
+    parser.add_argument("--list", action="store_true", help="List all available test stages")
+    
+    args = parser.parse_args()
+    runner = PromptTestRunner()
+    
+    if args.list:
+        print("Available test stages:")
+        for config in runner.get_stage_configs():
+            print(f"  - {config.stem}")
+    elif args.up_to is not None:
+        runner.run_up_to_stage(args.up_to)
+    else:
+        runner.run_all_stages()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From ee245158fb20689f31b248dcce10c97cc7cb7c90 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sat, 9 Nov 2024 15:37:44 -0800
Subject: [PATCH 05/10] big boy

---
 services/AgentService/prompts/Makefile                    | 2 +-
 services/AgentService/prompts/configs/01_raw_outline.yaml | 3 +--
 services/AgentService/prompts/scripts/run_tests.py        | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/services/AgentService/prompts/Makefile b/services/AgentService/prompts/Makefile
index 31f71e1..6327b6d 100644
--- a/services/AgentService/prompts/Makefile
+++ b/services/AgentService/prompts/Makefile
@@ -44,7 +44,7 @@ test-upto: setup-test
 	@cd $(CONFIG_DIR) && python ../scripts/run_tests.py --up-to $(stage)
 
 # List all available test stages
-test-list: setup-test
+test-list:
 	@echo "$(GREEN)Listing available test stages...$(NC)"
 	@python scripts/run_tests.py --list
 
diff --git a/services/AgentService/prompts/configs/01_raw_outline.yaml b/services/AgentService/prompts/configs/01_raw_outline.yaml
index 69abbba..f22c094 100644
--- a/services/AgentService/prompts/configs/01_raw_outline.yaml
+++ b/services/AgentService/prompts/configs/01_raw_outline.yaml
@@ -4,8 +4,7 @@ evaluateOptions:
   showProgressBar: true
 
 prompts:
-  - id: "../prompts.py:PodcastPrompts.raw_outline_prompt"
-    label: "raw_outline"
+  - "file://../prompts.py:PodcastPrompts.raw_outline_prompt"
 
 providers:
   - id: "file://../scripts/nim-provider.py"
diff --git a/services/AgentService/prompts/scripts/run_tests.py b/services/AgentService/prompts/scripts/run_tests.py
index d5da0ae..077bc84 100644
--- a/services/AgentService/prompts/scripts/run_tests.py
+++ b/services/AgentService/prompts/scripts/run_tests.py
@@ -19,7 +19,7 @@ def run_stage(self, config_path: Path) -> bool:
         print(f"\n=== Running stage: {config_path.stem} ===")
         
         # Create output path for this stage
-        output_path = self.outputs_dir / f"{config_path.stem}_results.html"
+        output_path = self.outputs_dir / f"{config_path.stem}_results.json"
         
         result = subprocess.run(
             ["promptfoo", "eval", 

From 07d2c3237dbecd8b8237e9bb70a9a38357008225 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sat, 9 Nov 2024 16:09:40 -0800
Subject: [PATCH 06/10] big refactor again

---
 .../prompts/configs/02_outline_json.yaml      | 25 +++++++++++++
 .../prompts/scripts/get-output.py             | 36 +++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 services/AgentService/prompts/configs/02_outline_json.yaml
 create mode 100644 services/AgentService/prompts/scripts/get-output.py

diff --git a/services/AgentService/prompts/configs/02_outline_json.yaml b/services/AgentService/prompts/configs/02_outline_json.yaml
new file mode 100644
index 0000000..a691f75
--- /dev/null
+++ b/services/AgentService/prompts/configs/02_outline_json.yaml
@@ -0,0 +1,25 @@
+description: "02 Outline JSON Generation"
+evaluateOptions:
+  maxConcurrency: 1
+  showProgressBar: true
+prompts:
+  - "file://../prompts.py:PodcastPrompts.outline_prompt"
+providers:
+  - id: "file://../scripts/nim-provider.py"
+    label: "405b"
+    config:
+      api_base: "https://youngthug.demoz.io"
+      model: "meta/llama-3.1-405b-instruct"
+      temperature: 0.7
+      max_tokens: 1000
+tests:
+  - description: "Outline JSON Generation"
+    vars:
+      text: file://../tests/outputs/01_raw_outline_results.json
+      schema: file://../tests/schemas/podcast_outline.json
+    options:
+      transformVars: file://../scripts/get-output.py:get_transform
+    storeOutputAs: outline_json_output
+    assert:
+      - type: is-json
+        value: file://../tests/schemas/podcast_outline.json
diff --git a/services/AgentService/prompts/scripts/get-output.py b/services/AgentService/prompts/scripts/get-output.py
new file mode 100644
index 0000000..e14322d
--- /dev/null
+++ b/services/AgentService/prompts/scripts/get-output.py
@@ -0,0 +1,36 @@
+import json
+
+def get_transform(vars, context):
+    """
+    Transform function for promptfoo that extracts just the output field from evaluation results JSON.
+    
+    Args:
+        vars (dict): Variables passed from promptfoo config
+        context (dict): Additional context from promptfoo
+        
+    Returns:
+        dict: Transformed variables including the extracted output
+    """
+    try:
+        # Read the JSON file content - it will be in vars['text']
+        data = json.loads(vars['text'])
+        
+        # Navigate through the JSON structure to find the output
+        results = data['results']
+        if isinstance(results, dict) and 'results' in results:
+            for result in results['results']:
+                if 'response' in result and 'output' in result['response']:
+                    # Return the original vars dict with our new transformed text
+                    return {
+                        **vars,
+                        'text': result['response']['output']
+                    }
+                    
+        raise ValueError("Could not find output in the JSON structure")
+        
+    except Exception as e:
+        print(f"Error transforming variables: {e}")
+        return {
+            **vars,
+            'error': f'Failed to transform variables: {str(e)}'
+        }
\ No newline at end of file

From 5a79dec1ffe0b5472d35aa30b326a312fdb454bb Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sun, 10 Nov 2024 12:33:30 -0800
Subject: [PATCH 07/10] works for 1 and 2 now

---
 .../prompts/configs/01_raw_outline.yaml       |  7 +-
 .../prompts/configs/02_outline_json.yaml      | 12 +--
 .../prompts/providers/nim-405b.py             | 86 ++++++++++++++++
 .../AgentService/prompts/providers/nim-70b.py | 86 ++++++++++++++++
 .../AgentService/prompts/providers/nim-8b.py  | 98 +++++++++++++++++++
 .../prompts/scripts/get-output.py             | 29 +++---
 6 files changed, 291 insertions(+), 27 deletions(-)
 create mode 100644 services/AgentService/prompts/providers/nim-405b.py
 create mode 100644 services/AgentService/prompts/providers/nim-70b.py
 create mode 100644 services/AgentService/prompts/providers/nim-8b.py

diff --git a/services/AgentService/prompts/configs/01_raw_outline.yaml b/services/AgentService/prompts/configs/01_raw_outline.yaml
index f22c094..3d0a7cd 100644
--- a/services/AgentService/prompts/configs/01_raw_outline.yaml
+++ b/services/AgentService/prompts/configs/01_raw_outline.yaml
@@ -7,13 +7,8 @@ prompts:
   - "file://../prompts.py:PodcastPrompts.raw_outline_prompt"
 
 providers:
-  - id: "file://../scripts/nim-provider.py"
+  - id: "file://../providers/nim-405b.py"
     label: "405b"
-    config:
-      api_base: "https://youngthug.demoz.io"
-      model: "meta/llama-3.1-405b-instruct"
-      temperature: 0.7
-      max_tokens: 1000
 
 tests:
   - description: "Raw Outline Generation"
diff --git a/services/AgentService/prompts/configs/02_outline_json.yaml b/services/AgentService/prompts/configs/02_outline_json.yaml
index a691f75..b6a6a4d 100644
--- a/services/AgentService/prompts/configs/02_outline_json.yaml
+++ b/services/AgentService/prompts/configs/02_outline_json.yaml
@@ -2,16 +2,16 @@ description: "02 Outline JSON Generation"
 evaluateOptions:
   maxConcurrency: 1
   showProgressBar: true
+
 prompts:
   - "file://../prompts.py:PodcastPrompts.outline_prompt"
+
 providers:
-  - id: "file://../scripts/nim-provider.py"
-    label: "405b"
+  - id: "file://../providers/nim-8b.py"
+    label: "8b"
     config:
-      api_base: "https://youngthug.demoz.io"
-      model: "meta/llama-3.1-405b-instruct"
-      temperature: 0.7
-      max_tokens: 1000
+      json_schema: file://../tests/schemas/podcast_outline.json
+
 tests:
   - description: "Outline JSON Generation"
     vars:
diff --git a/services/AgentService/prompts/providers/nim-405b.py b/services/AgentService/prompts/providers/nim-405b.py
new file mode 100644
index 0000000..8b1e8d1
--- /dev/null
+++ b/services/AgentService/prompts/providers/nim-405b.py
@@ -0,0 +1,86 @@
+# chat_provider.py
+import requests
+import json
+from typing import Dict, Any, Optional
+
+def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+    
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+    
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get('config', {})
+        api_base = config.get('api_base', "https://youngthug.demoz.io")
+        api_key = config.get('api_key')
+        model = config.get('model', "meta/llama-3.1-405b-instruct")
+        temperature = config.get('temperature', 0.7)
+        max_tokens = config.get('max_tokens', 1000)
+
+        # Setup headers
+        headers = {
+            "Content-Type": "application/json"
+        }
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False  
+        }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get("usage", {
+                    "total": 0,
+                    "prompt": 0,
+                    "completion": 0
+                })
+            }
+        else:
+            return {
+                "error": "No choices in response"
+            }
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {
+            "error": f"Unexpected error: {str(e)}"
+        }
\ No newline at end of file
diff --git a/services/AgentService/prompts/providers/nim-70b.py b/services/AgentService/prompts/providers/nim-70b.py
new file mode 100644
index 0000000..042dbea
--- /dev/null
+++ b/services/AgentService/prompts/providers/nim-70b.py
@@ -0,0 +1,86 @@
+# chat_provider.py
+import requests
+import json
+from typing import Dict, Any, Optional
+
+def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+    
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+    
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get('config', {})
+        api_base = config.get('api_base', "https://small-nim-pc8kmx5ae.brevlab.com/") 
+        api_key = config.get('api_key')
+        model = config.get('model', "meta/llama-3.1-70b-instruct")
+        temperature = config.get('temperature', 0.7)
+        max_tokens = config.get('max_tokens', 1000)
+
+        # Setup headers
+        headers = {
+            "Content-Type": "application/json"
+        }
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False  
+        }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get("usage", {
+                    "total": 0,
+                    "prompt": 0,
+                    "completion": 0
+                })
+            }
+        else:
+            return {
+                "error": "No choices in response"
+            }
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {
+            "error": f"Unexpected error: {str(e)}"
+        }
\ No newline at end of file
diff --git a/services/AgentService/prompts/providers/nim-8b.py b/services/AgentService/prompts/providers/nim-8b.py
new file mode 100644
index 0000000..3e39106
--- /dev/null
+++ b/services/AgentService/prompts/providers/nim-8b.py
@@ -0,0 +1,98 @@
+# Note we use 8b in json mode 
+
+import requests
+import json
+from typing import Dict, Any, Optional
+
+def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Custom provider for chat completions using your existing infrastructure.
+    
+    Args:
+        prompt: The prompt text or JSON string of messages
+        options: Configuration options from the YAML file
+        context: Test context including variables used
+    
+    Returns:
+        Dict containing output or error
+    """
+    try:
+        # Get configuration from options
+        config = options.get('config', {})
+        api_base = config.get('api_base', "https://nim-pc8kmx5ae.brevlab.com")
+        api_key = config.get('api_key')
+        model = config.get('model', "meta/llama-3.1-8b-instruct")
+        temperature = config.get('temperature', 0.7)
+        max_tokens = config.get('max_tokens', 1000)
+        json_schema = config.get('json_schema', "")
+
+        # Setup headers
+        headers = {
+            "Content-Type": "application/json"
+        }
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+
+        # Handle different prompt formats
+        try:
+            # Check if prompt is a JSON string containing messages
+            messages = json.loads(prompt)
+            if isinstance(messages, list):
+                chat_messages = messages
+            else:
+                chat_messages = [{"role": "user", "content": prompt}]
+        except json.JSONDecodeError:
+            # If not JSON, treat as regular prompt
+            chat_messages = [{"role": "user", "content": prompt}]
+
+        # Prepare payload
+        payload = {
+            "model": model,
+            "messages": chat_messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": False  
+        }
+
+        if json_schema != "":
+            # Remove 'file://' prefix if present
+            print(json_schema)
+            json_schema_path = "/Users/idhanani/Desktop/notebooklm/backend/services/AgentService/prompts/tests/schemas/podcast_outline.json"
+            with open(json_schema_path, 'r') as file:
+                json_schema_content = file.read()
+                payload["nvext"] = {
+                    "guided_json": json_schema_content
+                }
+
+        # Make request
+        response = requests.post(
+            f"{api_base}/v1/chat/completions",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+
+        if "choices" in result and len(result["choices"]) > 0:
+            output = result["choices"][0]["message"]["content"]
+            return {
+                "output": output,
+                "tokenUsage": result.get("usage", {
+                    "total": 0,
+                    "prompt": 0,
+                    "completion": 0
+                })
+            }
+        else:
+            return {
+                "error": "No choices in response"
+            }
+
+    except requests.exceptions.RequestException as e:
+        return {
+            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
+        }
+    except Exception as e:
+        return {
+            "error": f"Unexpected error: {str(e)}"
+        }
\ No newline at end of file
diff --git a/services/AgentService/prompts/scripts/get-output.py b/services/AgentService/prompts/scripts/get-output.py
index e14322d..d68f7f1 100644
--- a/services/AgentService/prompts/scripts/get-output.py
+++ b/services/AgentService/prompts/scripts/get-output.py
@@ -1,8 +1,10 @@
 import json
+import os
 
 def get_transform(vars, context):
     """
-    Transform function for promptfoo that extracts just the output field from evaluation results JSON.
+    Transform function for promptfoo that extracts the output field from evaluation results JSON
+    while preserving other variables.
     
     Args:
         vars (dict): Variables passed from promptfoo config
@@ -12,21 +14,18 @@ def get_transform(vars, context):
         dict: Transformed variables including the extracted output
     """
     try:
-        # Read the JSON file content - it will be in vars['text']
-        data = json.loads(vars['text'])
+        # Remove 'file://' prefix if present and get absolute path
+        file_path = vars['text'].replace('file://', '')
         
-        # Navigate through the JSON structure to find the output
-        results = data['results']
-        if isinstance(results, dict) and 'results' in results:
-            for result in results['results']:
-                if 'response' in result and 'output' in result['response']:
-                    # Return the original vars dict with our new transformed text
-                    return {
-                        **vars,
-                        'text': result['response']['output']
-                    }
-                    
-        raise ValueError("Could not find output in the JSON structure")
+        # Read and parse the JSON file directly without joining paths
+        with open(file_path, 'r') as f:
+            data = json.load(f)
+            
+        # Extract the output and return all vars with transformed text
+        return {
+            **vars,
+            'text': data['results']['results'][0]['response']['output']
+        }
         
     except Exception as e:
         print(f"Error transforming variables: {e}")

From e7548c54f91d8b7842b5402b641d4c575da22d01 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sun, 10 Nov 2024 12:34:41 -0800
Subject: [PATCH 08/10] moar

---
 .gitignore                                    |   3 +-
 services/AgentService/eval-harness/Makefile   |  51 -----
 services/AgentService/eval-harness/README.md  |  10 -
 .../AgentService/eval-harness/__init__.py     |   0
 .../eval-harness/generate_schemas.py          |  32 ----
 .../AgentService/eval-harness/nim-provider.py |  86 ---------
 .../eval-harness/promptfooconfig.yaml         | 174 -----------------
 .../tests/schemas/conversation.json           |  45 -----
 .../tests/schemas/podcast_outline.json        |  49 -----
 services/AgentService/prompts.py              | 180 ------------------
 10 files changed, 2 insertions(+), 628 deletions(-)
 delete mode 100644 services/AgentService/eval-harness/Makefile
 delete mode 100644 services/AgentService/eval-harness/README.md
 delete mode 100644 services/AgentService/eval-harness/__init__.py
 delete mode 100644 services/AgentService/eval-harness/generate_schemas.py
 delete mode 100644 services/AgentService/eval-harness/nim-provider.py
 delete mode 100644 services/AgentService/eval-harness/promptfooconfig.yaml
 delete mode 100644 services/AgentService/eval-harness/tests/schemas/conversation.json
 delete mode 100644 services/AgentService/eval-harness/tests/schemas/podcast_outline.json
 delete mode 100644 services/AgentService/prompts.py

diff --git a/.gitignore b/.gitignore
index c6c4be6..cf5ea7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,5 @@ get_helm.sh
 .ruff_cache
 data/minio
 eval.txt
-services/AgentService/prompts/tests
\ No newline at end of file
+services/AgentService/prompts/tests
+.DS_Store
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/Makefile b/services/AgentService/eval-harness/Makefile
deleted file mode 100644
index c5a35b9..0000000
--- a/services/AgentService/eval-harness/Makefile
+++ /dev/null
@@ -1,51 +0,0 @@
-# Colors for better visibility
-GREEN=\033[0;32m
-RED=\033[0;31m
-NC=\033[0m
-
-# Test directories
-TEST_DIR=tests
-OUTPUTS_DIR=$(TEST_DIR)/outputs
-SCHEMAS_DIR=$(TEST_DIR)/schemas
-INPUTS_DIR=$(TEST_DIR)/inputs
-
-# Ensure promptfoo is installed
-ensure-promptfoo:
-	@echo "$(GREEN)Checking npx installation...$(NC)"
-	@if ! command -v npx >/dev/null 2>&1; then \
-		echo "$(RED)Error: npx is not installed. Please install Node.js and npm first.$(NC)"; \
-		exit 1; \
-	fi
-	@echo "$(GREEN)Checking promptfoo installation...$(NC)"
-	@if ! npx promptfoo@latest --version >/dev/null 2>&1; then \
-		echo "$(GREEN)Installing promptfoo...$(NC)"; \
-		npx promptfoo@latest --version >/dev/null 2>&1 || true; \
-	fi
-
-# Generate JSON schemas
-generate-schemas:
-	@echo "$(GREEN)Generating JSON schemas from Pydantic models...$(NC)"
-	@python generate_schemas.py $(SCHEMAS_DIR)
-
-# Setup test environment
-setup-test: ensure-promptfoo
-	@echo "$(GREEN)Setting up test environment...$(NC)"
-	@mkdir -p $(INPUTS_DIR) $(OUTPUTS_DIR) $(SCHEMAS_DIR)
-	@make generate-schemas
-
-# Run end-to-end test
-test-e2e: setup-test
-	@echo "$(GREEN)Running end-to-end tests...$(NC)"
-	npx promptfoo@latest eval --output $(OUTPUTS_DIR)/out.html
-
-# Run original config
-test-basic: ensure-promptfoo
-	@echo "$(GREEN)Running basic promptfoo tests...$(NC)"
-	npx promptfoo@latest eval -c promptfooconfig.yaml
-
-# Clean test outputs
-clean:
-	@echo "$(GREEN)Cleaning test outputs...$(NC)"
-	rm -rf $(TEST_DIR)
-
-.PHONY: ensure-promptfoo generate-schemas setup-test test-e2e test-basic clean
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/README.md b/services/AgentService/eval-harness/README.md
deleted file mode 100644
index a128bdf..0000000
--- a/services/AgentService/eval-harness/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-To get started, set your OPENAI_API_KEY environment variable, or other required keys for the providers you selected.
-
-Next, edit promptfooconfig.yaml.
-
-Then run:
-```
-promptfoo eval
-```
-
-Afterwards, you can view the results by running `promptfoo view`
diff --git a/services/AgentService/eval-harness/__init__.py b/services/AgentService/eval-harness/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/services/AgentService/eval-harness/generate_schemas.py b/services/AgentService/eval-harness/generate_schemas.py
deleted file mode 100644
index 4fce6ae..0000000
--- a/services/AgentService/eval-harness/generate_schemas.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# tests/generate_schemas.py
-import json
-import sys
-from pathlib import Path
-sys.path.append("../../..")  # Add root to path to import shared
-
-from shared.shared.shared_types import Conversation, PodcastOutline
-
-def generate_schemas(output_dir: Path):
-    """Generate JSON schemas from Pydantic models."""
-    # Ensure output directory exists
-    output_dir.mkdir(parents=True, exist_ok=True)
-    
-    # Generate and save PodcastOutline schema
-    podcast_schema = PodcastOutline.model_json_schema()
-    with open(output_dir / "podcast_outline.json", "w") as f:
-        json.dump(podcast_schema, f, indent=2)
-    print("Generated podcast_outline.json")
-    
-    # Generate and save Conversation schema
-    conversation_schema = Conversation.model_json_schema()
-    with open(output_dir / "conversation.json", "w") as f:
-        json.dump(conversation_schema, f, indent=2)
-    print("Generated conversation.json")
-
-if __name__ == "__main__":
-    if len(sys.argv) > 1:
-        output_dir = Path(sys.argv[1])
-    else:
-        output_dir = Path(__file__).parent / "schemas"
-    
-    generate_schemas(output_dir)
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/nim-provider.py b/services/AgentService/eval-harness/nim-provider.py
deleted file mode 100644
index 8b1e8d1..0000000
--- a/services/AgentService/eval-harness/nim-provider.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# chat_provider.py
-import requests
-import json
-from typing import Dict, Any, Optional
-
-def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Custom provider for chat completions using your existing infrastructure.
-    
-    Args:
-        prompt: The prompt text or JSON string of messages
-        options: Configuration options from the YAML file
-        context: Test context including variables used
-    
-    Returns:
-        Dict containing output or error
-    """
-    try:
-        # Get configuration from options
-        config = options.get('config', {})
-        api_base = config.get('api_base', "https://youngthug.demoz.io")
-        api_key = config.get('api_key')
-        model = config.get('model', "meta/llama-3.1-405b-instruct")
-        temperature = config.get('temperature', 0.7)
-        max_tokens = config.get('max_tokens', 1000)
-
-        # Setup headers
-        headers = {
-            "Content-Type": "application/json"
-        }
-        if api_key:
-            headers["Authorization"] = f"Bearer {api_key}"
-
-        # Handle different prompt formats
-        try:
-            # Check if prompt is a JSON string containing messages
-            messages = json.loads(prompt)
-            if isinstance(messages, list):
-                chat_messages = messages
-            else:
-                chat_messages = [{"role": "user", "content": prompt}]
-        except json.JSONDecodeError:
-            # If not JSON, treat as regular prompt
-            chat_messages = [{"role": "user", "content": prompt}]
-
-        # Prepare payload
-        payload = {
-            "model": model,
-            "messages": chat_messages,
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-            "stream": False  
-        }
-
-        # Make request
-        response = requests.post(
-            f"{api_base}/v1/chat/completions",
-            headers=headers,
-            json=payload
-        )
-        response.raise_for_status()
-        result = response.json()
-
-        if "choices" in result and len(result["choices"]) > 0:
-            output = result["choices"][0]["message"]["content"]
-            return {
-                "output": output,
-                "tokenUsage": result.get("usage", {
-                    "total": 0,
-                    "prompt": 0,
-                    "completion": 0
-                })
-            }
-        else:
-            return {
-                "error": "No choices in response"
-            }
-
-    except requests.exceptions.RequestException as e:
-        return {
-            "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
-        }
-    except Exception as e:
-        return {
-            "error": f"Unexpected error: {str(e)}"
-        }
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/promptfooconfig.yaml b/services/AgentService/eval-harness/promptfooconfig.yaml
deleted file mode 100644
index 0de4de2..0000000
--- a/services/AgentService/eval-harness/promptfooconfig.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-description: "Complete End-to-End Podcast Generation Test"
-evaluateOptions:
-  maxConcurrency: 1
-  showProgressBar: true
-
-prompts:
-  - id: "./prompts.py:PodcastPrompts.raw_outline_prompt"
-    label: "raw_outline"
-  - id: "./prompts.py:PodcastPrompts.outline_prompt"
-    label: "outline"
-  # - name: "segment_transcript"
-  #   file: "../prompts.py:PodcastPrompts.segment_transcript_prompt"
-  # - name: "deep_dive"
-  #   file: "../prompts.py:PodcastPrompts.deep_dive_prompt"
-  # - name: "transcript"
-  #   file: "../prompts.py:PodcastPrompts.transcript_prompt"
-  # - name: "raw_podcast_dialogue"
-  #   file: "../prompts.py:PodcastPrompts.raw_podcast_dialogue_prompt_v2"
-  # - name: "fuse_outline"
-  #   file: "../prompts.py:PodcastPrompts.fuse_outline_prompt"
-  # - name: "revise"
-  #   file: "../prompts.py:PodcastPrompts.revise_prompt"
-  # - name: "podcast_dialogue"
-  #   file: "../prompts.py:PodcastPrompts.podcast_dialogue_prompt"
-
-providers:
-  - id: "file://./nim-provider.py"
-    label: "405b"
-    config:
-      api_base: "https://youngthug.demoz.io"
-      model: "meta/llama-3.1-405b-instruct"
-      temperature: 0.7
-      max_tokens: 1000
-
-tests:
-  - description: "Raw Outline Generation"
-    vars:
-      text: file://./eval.txt
-      duration: 15
-    storeOutputAs: raw_outline_output
-    assert:
-      - type: llm-rubric
-        value: |
-          Evaluate if the outline:
-          1. Has clear sections for background, innovation, impact, and future work
-          2. Makes innovation the focus
-          3. Allocates time appropriately for {{duration}} minutes
-          4. Maintains technical accuracy while being accessible
-          Pass if all scores >= 0.95
-        provider: "file://./nim-provider.py"
-
-  # Step 2: JSON Outline
-  - description: "JSON Outline Conversion"
-    vars:
-      text: "{{raw_outline_output}}"
-      schema: file://tests/schemas/podcast_outline.json
-    prompt: outline
-    assert:
-      - type: contains-json
-        value: file://tests/schemas/podcast_outline.json
-
-  # # Step 3: Segment Processing
-  # - description: "Segment Transcript Generation"
-  #   vars:
-  #     text: file://tests/inputs/sample.txt
-  #     duration: 5
-  #     topic: "Neural Network Architecture"
-  #     angles: "Technical implementation, Results, Impact"
-  #   prompt: segment_transcript
-  #   assert:
-  #     - type: llm-rubric
-  #       value: |
-  #         Check if the transcript:
-  #         1. Explains concepts clearly for no prior knowledge
-  #         2. Uses analogies and examples
-  #         3. Addresses potential questions
-  #         4. Provides comprehensive context
-  #         5. Fits {{duration}} minute length
-  #         Pass if all >= 0.7
-
-  # # Step 4: Deep Dive Analysis
-  # - description: "Deep Dive Processing"
-  #   vars:
-  #     text: file://tests/inputs/sample.txt
-  #     topic: "Neural Networks for Climate Prediction"
-  #     duration: 7
-  #   prompt: deep_dive
-  #   assert:
-  #     - type: llm-rubric
-  #       value: |
-  #         Verify the deep dive:
-  #         1. Expands content with details and examples
-  #         2. Creates clear topic/subtopic structure
-  #         3. Maintains focus on important aspects
-  #         4. Fits {{duration}} minute timeframe
-  #         Pass if all >= 0.75
-
-  # # Step 5: Transcript Combination
-  # - description: "Transcript Combination"
-  #   vars:
-  #     segments: [["First segment content", 5], ["Second segment content", 5]]
-  #   prompt: transcript
-  #   assert:
-  #     - type: llm-rubric
-  #       value: |
-  #         Check if combined transcript:
-  #         1. Maintains all original content
-  #         2. Has natural flow between segments
-  #         3. Keeps storytelling elements
-  #         Pass if all >= 0.7
-
-  # # Step 6: Raw Dialogue Generation
-  # - description: "Raw Podcast Dialogue"
-  #   vars:
-  #     text: "{{previousOutput}}"
-  #     duration: 15
-  #     speaker_1_name: "Dr. Sarah"
-  #     speaker_2_name: "Prof. Mike"
-  #     descriptions: "Neural Networks, Climate Prediction"
-  #   prompt: raw_podcast_dialogue
-  #   assert:
-  #     - type: llm-rubric
-  #       value: |
-  #         Evaluate dialogue:
-  #         1. Natural conversation with appropriate fillers
-  #         2. Good speaker alternation
-  #         3. Maintains technical accuracy
-  #         4. Includes required conversation elements
-  #         Pass if all >= 0.75
-
-  # # Step 7: Outline Fusion
-  # - description: "Outline Fusion"
-  #   vars:
-  #     overall_outline: "Main outline content"
-  #     sub_outline: "Detailed section outline"
-  #   prompt: fuse_outline
-  #   assert:
-  #     - type: llm-rubric
-  #       value: |
-  #         Check if fused outline:
-  #         1. Combines both outlines without loss
-  #         2. Maintains proper structure
-  #         3. Uses correct time budgets
-  #         Pass if all >= 0.7
-
-  # # Step 8: Dialogue Revision
-  # - description: "Dialogue Revision"
-  #   vars:
-  #     outline: "Podcast outline"
-  #     raw_transcript: "Raw content"
-  #     dialogue_transcript: "Initial dialogue"
-  #   prompt: revise
-  #   assert:
-  #     - type: llm-rubric
-  #       value: |
-  #         Verify revised dialogue:
-  #         1. Natural conversation flow
-  #         2. Proper speaker exchanges
-  #         3. No explicit transitions
-  #         4. No information loss
-  #         Pass if all >= 0.8
-
-  # # Step 9: Final JSON Conversion
-  # - description: "Final JSON Format"
-  #   vars:
-  #     text: "{{previousOutput}}"
-  #     schema: file://tests/schemas/conversation.json
-  #     speaker_1_name: "Dr. Sarah"
-  #     speaker_2_name: "Prof. Mike"
-  #   prompt: podcast_dialogue
-  #   assert:
-  #     - type: contains-json
-  #     - type: json-schema
-  #       value: file://tests/schemas/conversation.json
diff --git a/services/AgentService/eval-harness/tests/schemas/conversation.json b/services/AgentService/eval-harness/tests/schemas/conversation.json
deleted file mode 100644
index 24c0484..0000000
--- a/services/AgentService/eval-harness/tests/schemas/conversation.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "$defs": {
-    "DialogueEntry": {
-      "properties": {
-        "text": {
-          "title": "Text",
-          "type": "string"
-        },
-        "speaker": {
-          "enum": [
-            "speaker-1",
-            "speaker-2"
-          ],
-          "title": "Speaker",
-          "type": "string"
-        }
-      },
-      "required": [
-        "text",
-        "speaker"
-      ],
-      "title": "DialogueEntry",
-      "type": "object"
-    }
-  },
-  "properties": {
-    "scratchpad": {
-      "title": "Scratchpad",
-      "type": "string"
-    },
-    "dialogue": {
-      "items": {
-        "$ref": "#/$defs/DialogueEntry"
-      },
-      "title": "Dialogue",
-      "type": "array"
-    }
-  },
-  "required": [
-    "scratchpad",
-    "dialogue"
-  ],
-  "title": "Conversation",
-  "type": "object"
-}
\ No newline at end of file
diff --git a/services/AgentService/eval-harness/tests/schemas/podcast_outline.json b/services/AgentService/eval-harness/tests/schemas/podcast_outline.json
deleted file mode 100644
index 7a07b89..0000000
--- a/services/AgentService/eval-harness/tests/schemas/podcast_outline.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-  "$defs": {
-    "PodcastSegment": {
-      "properties": {
-        "section": {
-          "title": "Section",
-          "type": "string"
-        },
-        "descriptions": {
-          "items": {
-            "type": "string"
-          },
-          "title": "Descriptions",
-          "type": "array"
-        },
-        "duration": {
-          "title": "Duration",
-          "type": "integer"
-        }
-      },
-      "required": [
-        "section",
-        "descriptions",
-        "duration"
-      ],
-      "title": "PodcastSegment",
-      "type": "object"
-    }
-  },
-  "properties": {
-    "title": {
-      "title": "Title",
-      "type": "string"
-    },
-    "segments": {
-      "items": {
-        "$ref": "#/$defs/PodcastSegment"
-      },
-      "title": "Segments",
-      "type": "array"
-    }
-  },
-  "required": [
-    "title",
-    "segments"
-  ],
-  "title": "PodcastOutline",
-  "type": "object"
-}
\ No newline at end of file
diff --git a/services/AgentService/prompts.py b/services/AgentService/prompts.py
deleted file mode 100644
index c0c5d1c..0000000
--- a/services/AgentService/prompts.py
+++ /dev/null
@@ -1,180 +0,0 @@
-import jinja2
-
-# Raw string prompts
-RAW_OUTLINE_PROMPT_STR = """I want to make the following paper into a podcast transcript for {{ duration }} minutes, to help audience understand background, innovation, impact and future work. 
-
-Come up the structure of the podcast.
-                                 
-{{ text }}
-                                     
-Innovation should be the focus of the podcast, and the most important part of the podcast, with enough details."""
-
-OUTLINE_PROMPT_STR = """Given the free form outline, convert in into a structured outline without losing any information.                                 
-
-{{ text }}
-                                                           
-The result must conform to the following JSON schema:\n{{ schema }}\n\n"""
-
-SEGMENT_TRANSCRIPT_PROMPT_STR = """Make a transcript given the text:
-
-{{ text }}
-                                            
-The transcript is about {{ duration }} minutes, approximately {{ (duration * 180) | int }} words.
-The transcript's subject is {{ topic }}, and should focus on the following topics: {{ angles }}
-                                            
-Explain all concepts clearly, assuming no prior knowledge
-Use analogies, stories, or examples to illustrate points
-Address potential questions or counterarguments
-Provide context and background information throughout
-Make sure the details, numbers are accurate and comprehensive
-                                            
-Dive deep into each topic, and provide enough details given the time budget, don't leave any stone unturned."""
-
-DEEP_DIVE_PROMPT_STR = """You will be given some content, short ideas or thoughts about the content.
-
-Your task is to expand the content into a detailed and comprehensive explanation, with enough details and examples.
-
-Here is the content
-
-{{text}}
-                                   
-The topic will be around
-                                   
-{{topic}}
-                                   
-Dive deep into each topic, come up with an outline with topics and subtopics to help fully understand the content.
-Expand the topics, don't add any other topics. Allocate time budget for each topic. Total time budget should be {{ duration }} minutes.
-Focus on the most important topics and ideas, and allocate more time budget to them.
-Avoid introduction and conclusion in the outline, focus on expanding into subtopics."""
-
-TRANSCRIPT_PROMPT_STR = """Given the transcript of different segments,combine and optimize the transcript to make the flow more natural.
-The content should be strictly following the transcript, and only optimize the flow. Keep all the details, and storytelling contents.
-
-{% for segment, duration in segments %}
-
-Time budget: {{ duration }} minutes, approximately {{ (duration * 180) | int }} words.
-{{ segment }}                                    
-
-{% endfor %}
-                                    
-Only return the full transcript, no need to include any other information like time budget or segment name."""
-
-RAW_PODCAST_DIALOGUE_PROMPT_V2_STR = """Your task is to transform the provided input transcript into a lively, engaging, and informative podcast dialogue. 
-
-There are two speakers, speaker-1 and speaker-2.
-speaker-1's name is {{ speaker_1_name }}, and speaker-2's name is {{ speaker_2_name }}.
-
-Given the following conversation, make introductions for both speakers at beginning of the conversation.
-During the conversation, occasionally mention the speaker's name to refer to them, to make the conversation more natural.
-Incorporate natural speech patterns, including occasional verbal fillers (e.g., "um," "well," "you know")
-Use casual language and ensure the dialogue flows smoothly, reflecting a real-life conversation
-The fillers should be used naturally, not in every sentence, and not in a robotic way but related to topic and conversation context.
-                                          
-Maintain a lively pace with a mix of serious discussion and lighter moments
-Use rhetorical questions or hypotheticals to involve the listener
-Create natural moments of reflection or emphasis
-     
-Allow for natural interruptions and back-and-forth between host and guest
-Ensure the guest's responses are substantiated by the input text, avoiding unsupported claims                                   
-Avoid long sentences from either speaker, break them into conversations between two speakers.
-Throughout the script, strive for authenticity in the conversation. Include:
-   - Moments of genuine curiosity or surprise from the host
-   - Instances where the guest might briefly struggle to articulate a complex idea
-   - Light-hearted moments or humor when appropriate
-   - Brief personal anecdotes or examples that relate to the topic (within the bounds of the input text)
-                 
-Don't lose any information or details in the transcript. It is only format conversion, so strictly follow the transcript.
-                                                 
-This segment is about {{ duration }} minutes, approximately {{ (duration * 180) | int }} words.
-The topic is {{ descriptions }}
-                                          
-You should keep all analogies, stories, examples, and quotes from the transcript.
-
-Here is the transcript:
-{{text}}
-                                          
-Only return the full dialogue transcript, no need to include any other information like time budget or segment name.
-Don't add introduction and ending to the dialogue unless it is provided in the transcript."""
-
-FUSE_OUTLINE_PROMPT_STR = """You are given two outlines, one is overall outline, another is sub-outline for one section in the overall outline.
-You need to fuse the two outlines into a new outline, to represent the whole podcast without losing any descriptions in sub sections.
-Ignore the time budget in the sub-outline, and use the time budget in the overall outline.
-Overall outline:
-{{ overall_outline }}
-
-Sub-outline:
-{{ sub_outline }}
-
-Output the new outline with the tree structure."""
-
-REVISE_PROMPT_STR = """You are given a podcast dialogue transcript, and a raw transcript of the podcast.
-You are only allowed to copy information from the raw dialogue transcript to make the conversation more natural and engaging, but exactly follow the outline.
-                                
-Outline:
-{{ outline}}
-
-Here is the dialogue transcript:
-{{ dialogue_transcript }}
-
-You need also to break long sentences from either speaker into conversations between two speakers, by inserting more dialogue entries and verbal fillers (e.g., "um")
-Don't let a single speaker talk more than 2 sentences, and break the conversation into multiple exchanges between two speakers.
-                                
-Don't make any explict transition between sections, this is one podcast, and the sections are connected.
-Don't use words like "Welcome back" or "Now we are going to talk about" etc.
-Don't make introductions in the middle of the conversation.
-Merge related topics according to outline and don't repeat same things in different place.
-                                
-Don't lose any information or details from the raw transcript, only make the conversation flow more natural."""
-
-PODCAST_DIALOGUE_PROMPT_STR = """Given a podcast transcript between two speakers, convert it into a structured JSON format.
-- Only do conversion
-- Don't miss any information in the transcript
-
-There are two speakers, speaker-1 and speaker-2.
-speaker-1's name is {{ speaker_1_name }}, and speaker-2's name is {{ speaker_2_name }}.
-                                          
-Here is the original transcript:
-{{ text }}
-                                          
-The result must conform to the following JSON schema:\n{{ schema }}\n\n"""
-
-# Wrap raw strings in Jinja templates
-RAW_OUTLINE_PROMPT = jinja2.Template(RAW_OUTLINE_PROMPT_STR)
-OUTLINE_PROMPT = jinja2.Template(OUTLINE_PROMPT_STR)
-SEGMENT_TRANSCRIPT_PROMPT = jinja2.Template(SEGMENT_TRANSCRIPT_PROMPT_STR)
-DEEP_DIVE_PROMPT = jinja2.Template(DEEP_DIVE_PROMPT_STR)
-TRANSCRIPT_PROMPT = jinja2.Template(TRANSCRIPT_PROMPT_STR)
-RAW_PODCAST_DIALOGUE_PROMPT_v2 = jinja2.Template(RAW_PODCAST_DIALOGUE_PROMPT_V2_STR)
-FUSE_OUTLINE_PROMPT = jinja2.Template(FUSE_OUTLINE_PROMPT_STR)
-REVISE_PROMPT = jinja2.Template(REVISE_PROMPT_STR)
-PODCAST_DIALOGUE_PROMPT = jinja2.Template(PODCAST_DIALOGUE_PROMPT_STR)
-
-
-# Class to hold all prompts
-class PodcastPrompts:
-    def raw_outline_prompt(self):
-        return RAW_OUTLINE_PROMPT_STR
-
-    def outline_prompt(self):
-        return OUTLINE_PROMPT_STR
-
-    def segment_transcript_prompt(self):
-        return SEGMENT_TRANSCRIPT_PROMPT_STR
-
-    def deep_dive_prompt(self):
-        return DEEP_DIVE_PROMPT_STR
-
-    def transcript_prompt(self):
-        return TRANSCRIPT_PROMPT_STR
-
-    def raw_podcast_dialogue_prompt_v2(self):
-        return RAW_PODCAST_DIALOGUE_PROMPT_V2_STR
-
-    def fuse_outline_prompt(self):
-        return FUSE_OUTLINE_PROMPT_STR
-
-    def revise_prompt(self):
-        return REVISE_PROMPT_STR
-
-    def podcast_dialogue_prompt(self):
-        return PODCAST_DIALOGUE_PROMPT_STR

From 2c885e6ac1b07e8dd38dde70c2fb2102704db82d Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sun, 10 Nov 2024 13:11:40 -0800
Subject: [PATCH 09/10] ruff

---
 services/AgentService/main.py                 |  3 +-
 .../prompts/providers/nim-405b.py             | 49 +++++++--------
 .../AgentService/prompts/providers/nim-70b.py | 49 +++++++--------
 .../AgentService/prompts/providers/nim-8b.py  | 59 ++++++++-----------
 .../prompts/scripts/generate_schemas.py       | 11 ++--
 .../prompts/scripts/get-output.py             | 26 ++++----
 .../prompts/scripts/nim-provider.py           | 49 +++++++--------
 .../AgentService/prompts/scripts/run_tests.py | 37 ++++++------
 shared/shared/shared_types.py                 |  2 +-
 9 files changed, 127 insertions(+), 158 deletions(-)

diff --git a/services/AgentService/main.py b/services/AgentService/main.py
index 2aaea94..289c33a 100644
--- a/services/AgentService/main.py
+++ b/services/AgentService/main.py
@@ -1,5 +1,5 @@
 from fastapi import FastAPI, BackgroundTasks, HTTPException
-from shared.shared_types import ServiceType, JobStatus, Conversation, PodcastSegment, PodcastOutline
+from shared.shared_types import ServiceType, JobStatus, Conversation, PodcastOutline
 from shared.storage import StorageManager
 from shared.job import JobStatusManager
 from shared.otel import OpenTelemetryInstrumentation, OpenTelemetryConfig
@@ -32,6 +32,7 @@
 
 # Data Models
 
+
 class TranscriptionRequest(BaseModel):
     markdown: str
     duration: int = 20
diff --git a/services/AgentService/prompts/providers/nim-405b.py b/services/AgentService/prompts/providers/nim-405b.py
index 8b1e8d1..c34b4b9 100644
--- a/services/AgentService/prompts/providers/nim-405b.py
+++ b/services/AgentService/prompts/providers/nim-405b.py
@@ -1,33 +1,34 @@
 # chat_provider.py
 import requests
 import json
-from typing import Dict, Any, Optional
+from typing import Dict, Any
 
-def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
     """
     Custom provider for chat completions using your existing infrastructure.
-    
+
     Args:
         prompt: The prompt text or JSON string of messages
         options: Configuration options from the YAML file
         context: Test context including variables used
-    
+
     Returns:
         Dict containing output or error
     """
     try:
         # Get configuration from options
-        config = options.get('config', {})
-        api_base = config.get('api_base', "https://youngthug.demoz.io")
-        api_key = config.get('api_key')
-        model = config.get('model', "meta/llama-3.1-405b-instruct")
-        temperature = config.get('temperature', 0.7)
-        max_tokens = config.get('max_tokens', 1000)
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://youngthug.demoz.io")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-405b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
 
         # Setup headers
-        headers = {
-            "Content-Type": "application/json"
-        }
+        headers = {"Content-Type": "application/json"}
         if api_key:
             headers["Authorization"] = f"Bearer {api_key}"
 
@@ -49,14 +50,12 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             "messages": chat_messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
-            "stream": False  
+            "stream": False,
         }
 
         # Make request
         response = requests.post(
-            f"{api_base}/v1/chat/completions",
-            headers=headers,
-            json=payload
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
         )
         response.raise_for_status()
         result = response.json()
@@ -65,22 +64,16 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             output = result["choices"][0]["message"]["content"]
             return {
                 "output": output,
-                "tokenUsage": result.get("usage", {
-                    "total": 0,
-                    "prompt": 0,
-                    "completion": 0
-                })
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
             }
         else:
-            return {
-                "error": "No choices in response"
-            }
+            return {"error": "No choices in response"}
 
     except requests.exceptions.RequestException as e:
         return {
             "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
         }
     except Exception as e:
-        return {
-            "error": f"Unexpected error: {str(e)}"
-        }
\ No newline at end of file
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/providers/nim-70b.py b/services/AgentService/prompts/providers/nim-70b.py
index 042dbea..b0b70bd 100644
--- a/services/AgentService/prompts/providers/nim-70b.py
+++ b/services/AgentService/prompts/providers/nim-70b.py
@@ -1,33 +1,34 @@
 # chat_provider.py
 import requests
 import json
-from typing import Dict, Any, Optional
+from typing import Dict, Any
 
-def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
     """
     Custom provider for chat completions using your existing infrastructure.
-    
+
     Args:
         prompt: The prompt text or JSON string of messages
         options: Configuration options from the YAML file
         context: Test context including variables used
-    
+
     Returns:
         Dict containing output or error
     """
     try:
         # Get configuration from options
-        config = options.get('config', {})
-        api_base = config.get('api_base', "https://small-nim-pc8kmx5ae.brevlab.com/") 
-        api_key = config.get('api_key')
-        model = config.get('model', "meta/llama-3.1-70b-instruct")
-        temperature = config.get('temperature', 0.7)
-        max_tokens = config.get('max_tokens', 1000)
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://small-nim-pc8kmx5ae.brevlab.com/")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-70b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
 
         # Setup headers
-        headers = {
-            "Content-Type": "application/json"
-        }
+        headers = {"Content-Type": "application/json"}
         if api_key:
             headers["Authorization"] = f"Bearer {api_key}"
 
@@ -49,14 +50,12 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             "messages": chat_messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
-            "stream": False  
+            "stream": False,
         }
 
         # Make request
         response = requests.post(
-            f"{api_base}/v1/chat/completions",
-            headers=headers,
-            json=payload
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
         )
         response.raise_for_status()
         result = response.json()
@@ -65,22 +64,16 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             output = result["choices"][0]["message"]["content"]
             return {
                 "output": output,
-                "tokenUsage": result.get("usage", {
-                    "total": 0,
-                    "prompt": 0,
-                    "completion": 0
-                })
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
             }
         else:
-            return {
-                "error": "No choices in response"
-            }
+            return {"error": "No choices in response"}
 
     except requests.exceptions.RequestException as e:
         return {
             "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
         }
     except Exception as e:
-        return {
-            "error": f"Unexpected error: {str(e)}"
-        }
\ No newline at end of file
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/providers/nim-8b.py b/services/AgentService/prompts/providers/nim-8b.py
index 3e39106..97538c4 100644
--- a/services/AgentService/prompts/providers/nim-8b.py
+++ b/services/AgentService/prompts/providers/nim-8b.py
@@ -1,35 +1,36 @@
-# Note we use 8b in json mode 
+# Note we use 8b in json mode
 
 import requests
 import json
-from typing import Dict, Any, Optional
+from typing import Dict, Any
 
-def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
     """
     Custom provider for chat completions using your existing infrastructure.
-    
+
     Args:
         prompt: The prompt text or JSON string of messages
         options: Configuration options from the YAML file
         context: Test context including variables used
-    
+
     Returns:
         Dict containing output or error
     """
     try:
         # Get configuration from options
-        config = options.get('config', {})
-        api_base = config.get('api_base', "https://nim-pc8kmx5ae.brevlab.com")
-        api_key = config.get('api_key')
-        model = config.get('model', "meta/llama-3.1-8b-instruct")
-        temperature = config.get('temperature', 0.7)
-        max_tokens = config.get('max_tokens', 1000)
-        json_schema = config.get('json_schema', "")
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://nim-pc8kmx5ae.brevlab.com")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-8b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
+        json_schema = config.get("json_schema", "")
 
         # Setup headers
-        headers = {
-            "Content-Type": "application/json"
-        }
+        headers = {"Content-Type": "application/json"}
         if api_key:
             headers["Authorization"] = f"Bearer {api_key}"
 
@@ -51,24 +52,20 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             "messages": chat_messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
-            "stream": False  
+            "stream": False,
         }
 
         if json_schema != "":
             # Remove 'file://' prefix if present
             print(json_schema)
             json_schema_path = "/Users/idhanani/Desktop/notebooklm/backend/services/AgentService/prompts/tests/schemas/podcast_outline.json"
-            with open(json_schema_path, 'r') as file:
+            with open(json_schema_path, "r") as file:
                 json_schema_content = file.read()
-                payload["nvext"] = {
-                    "guided_json": json_schema_content
-                }
+                payload["nvext"] = {"guided_json": json_schema_content}
 
         # Make request
         response = requests.post(
-            f"{api_base}/v1/chat/completions",
-            headers=headers,
-            json=payload
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
         )
         response.raise_for_status()
         result = response.json()
@@ -77,22 +74,16 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             output = result["choices"][0]["message"]["content"]
             return {
                 "output": output,
-                "tokenUsage": result.get("usage", {
-                    "total": 0,
-                    "prompt": 0,
-                    "completion": 0
-                })
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
             }
         else:
-            return {
-                "error": "No choices in response"
-            }
+            return {"error": "No choices in response"}
 
     except requests.exceptions.RequestException as e:
         return {
             "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
         }
     except Exception as e:
-        return {
-            "error": f"Unexpected error: {str(e)}"
-        }
\ No newline at end of file
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/scripts/generate_schemas.py b/services/AgentService/prompts/scripts/generate_schemas.py
index 1fe8d7f..ab6784e 100644
--- a/services/AgentService/prompts/scripts/generate_schemas.py
+++ b/services/AgentService/prompts/scripts/generate_schemas.py
@@ -2,34 +2,35 @@
 import json
 import sys
 from pathlib import Path
+from shared.shared.shared_types import Conversation, PodcastOutline
 
 # Get the absolute path to the root directory
 root_dir = Path(__file__).resolve().parents[4]
 sys.path.append(str(root_dir))
 
-from shared.shared.shared_types import Conversation, PodcastOutline
 
 def generate_schemas(output_dir: Path):
     """Generate JSON schemas from Pydantic models."""
     # Ensure output directory exists
     output_dir.mkdir(parents=True, exist_ok=True)
-    
+
     # Generate and save PodcastOutline schema
     podcast_schema = PodcastOutline.model_json_schema()
     with open(output_dir / "podcast_outline.json", "w") as f:
         json.dump(podcast_schema, f, indent=2)
     print("Generated podcast_outline.json")
-    
+
     # Generate and save Conversation schema
     conversation_schema = Conversation.model_json_schema()
     with open(output_dir / "conversation.json", "w") as f:
         json.dump(conversation_schema, f, indent=2)
     print("Generated conversation.json")
 
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         output_dir = Path(sys.argv[1])
     else:
         output_dir = Path(__file__).parent / "schemas"
-    
-    generate_schemas(output_dir)
\ No newline at end of file
+
+    generate_schemas(output_dir)
diff --git a/services/AgentService/prompts/scripts/get-output.py b/services/AgentService/prompts/scripts/get-output.py
index d68f7f1..a9bc483 100644
--- a/services/AgentService/prompts/scripts/get-output.py
+++ b/services/AgentService/prompts/scripts/get-output.py
@@ -1,35 +1,29 @@
 import json
-import os
+
 
 def get_transform(vars, context):
     """
     Transform function for promptfoo that extracts the output field from evaluation results JSON
     while preserving other variables.
-    
+
     Args:
         vars (dict): Variables passed from promptfoo config
         context (dict): Additional context from promptfoo
-        
+
     Returns:
         dict: Transformed variables including the extracted output
     """
     try:
         # Remove 'file://' prefix if present and get absolute path
-        file_path = vars['text'].replace('file://', '')
-        
+        file_path = vars["text"].replace("file://", "")
+
         # Read and parse the JSON file directly without joining paths
-        with open(file_path, 'r') as f:
+        with open(file_path, "r") as f:
             data = json.load(f)
-            
+
         # Extract the output and return all vars with transformed text
-        return {
-            **vars,
-            'text': data['results']['results'][0]['response']['output']
-        }
-        
+        return {**vars, "text": data["results"]["results"][0]["response"]["output"]}
+
     except Exception as e:
         print(f"Error transforming variables: {e}")
-        return {
-            **vars,
-            'error': f'Failed to transform variables: {str(e)}'
-        }
\ No newline at end of file
+        return {**vars, "error": f"Failed to transform variables: {str(e)}"}
diff --git a/services/AgentService/prompts/scripts/nim-provider.py b/services/AgentService/prompts/scripts/nim-provider.py
index 8b1e8d1..c34b4b9 100644
--- a/services/AgentService/prompts/scripts/nim-provider.py
+++ b/services/AgentService/prompts/scripts/nim-provider.py
@@ -1,33 +1,34 @@
 # chat_provider.py
 import requests
 import json
-from typing import Dict, Any, Optional
+from typing import Dict, Any
 
-def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
+
+def call_api(
+    prompt: str, options: Dict[str, Any], context: Dict[str, Any]
+) -> Dict[str, Any]:
     """
     Custom provider for chat completions using your existing infrastructure.
-    
+
     Args:
         prompt: The prompt text or JSON string of messages
         options: Configuration options from the YAML file
         context: Test context including variables used
-    
+
     Returns:
         Dict containing output or error
     """
     try:
         # Get configuration from options
-        config = options.get('config', {})
-        api_base = config.get('api_base', "https://youngthug.demoz.io")
-        api_key = config.get('api_key')
-        model = config.get('model', "meta/llama-3.1-405b-instruct")
-        temperature = config.get('temperature', 0.7)
-        max_tokens = config.get('max_tokens', 1000)
+        config = options.get("config", {})
+        api_base = config.get("api_base", "https://youngthug.demoz.io")
+        api_key = config.get("api_key")
+        model = config.get("model", "meta/llama-3.1-405b-instruct")
+        temperature = config.get("temperature", 0.7)
+        max_tokens = config.get("max_tokens", 1000)
 
         # Setup headers
-        headers = {
-            "Content-Type": "application/json"
-        }
+        headers = {"Content-Type": "application/json"}
         if api_key:
             headers["Authorization"] = f"Bearer {api_key}"
 
@@ -49,14 +50,12 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             "messages": chat_messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
-            "stream": False  
+            "stream": False,
         }
 
         # Make request
         response = requests.post(
-            f"{api_base}/v1/chat/completions",
-            headers=headers,
-            json=payload
+            f"{api_base}/v1/chat/completions", headers=headers, json=payload
         )
         response.raise_for_status()
         result = response.json()
@@ -65,22 +64,16 @@ def call_api(prompt: str, options: Dict[str, Any], context: Dict[str, Any]) -> D
             output = result["choices"][0]["message"]["content"]
             return {
                 "output": output,
-                "tokenUsage": result.get("usage", {
-                    "total": 0,
-                    "prompt": 0,
-                    "completion": 0
-                })
+                "tokenUsage": result.get(
+                    "usage", {"total": 0, "prompt": 0, "completion": 0}
+                ),
             }
         else:
-            return {
-                "error": "No choices in response"
-            }
+            return {"error": "No choices in response"}
 
     except requests.exceptions.RequestException as e:
         return {
             "error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
         }
     except Exception as e:
-        return {
-            "error": f"Unexpected error: {str(e)}"
-        }
\ No newline at end of file
+        return {"error": f"Unexpected error: {str(e)}"}
diff --git a/services/AgentService/prompts/scripts/run_tests.py b/services/AgentService/prompts/scripts/run_tests.py
index 077bc84..c728349 100644
--- a/services/AgentService/prompts/scripts/run_tests.py
+++ b/services/AgentService/prompts/scripts/run_tests.py
@@ -2,48 +2,47 @@
 import argparse
 import subprocess
 from pathlib import Path
-from typing import List, Optional
+from typing import List
+
 
 class PromptTestRunner:
     def __init__(self, config_dir: str = "configs"):
         self.base_dir = Path(__file__).parent.parent  # prompts directory
         self.config_dir = self.base_dir / config_dir
         self.outputs_dir = self.base_dir / "tests/outputs"
-        
+
     def get_stage_configs(self) -> List[Path]:
         """Get all numbered configuration files in order."""
         return sorted(self.config_dir.glob("[0-9][0-9]_*.yaml"))
-    
+
     def run_stage(self, config_path: Path) -> bool:
         """Run a single test stage using promptfoo."""
         print(f"\n=== Running stage: {config_path.stem} ===")
-        
+
         # Create output path for this stage
         output_path = self.outputs_dir / f"{config_path.stem}_results.json"
-        
+
         result = subprocess.run(
-            ["promptfoo", "eval", 
-             "-c", str(config_path),
-             "--output", str(output_path)],
+            ["promptfoo", "eval", "-c", str(config_path), "--output", str(output_path)],
             capture_output=True,
-            text=True
+            text=True,
         )
-        
+
         # Print the output regardless of success/failure
         if result.stdout:
             print(result.stdout)
         if result.returncode != 0:
             print(f"Error: {result.stderr}")
-            
+
         return result.returncode == 0
-    
+
     def run_all_stages(self) -> None:
         """Run all test stages in order."""
         for config in self.get_stage_configs():
             if not self.run_stage(config):
                 print(f"\nStage {config.stem} failed. Stopping pipeline.")
                 break
-    
+
     def run_up_to_stage(self, target_stage: int) -> None:
         """Run all stages up to and including the target stage number."""
         for config in self.get_stage_configs():
@@ -54,14 +53,17 @@ def run_up_to_stage(self, target_stage: int) -> None:
                 print(f"\nStage {config.stem} failed. Stopping pipeline.")
                 break
 
+
 def main():
     parser = argparse.ArgumentParser(description="Run prompt tests in stages")
     parser.add_argument("--up-to", type=int, help="Run all stages up to this number")
-    parser.add_argument("--list", action="store_true", help="List all available test stages")
-    
+    parser.add_argument(
+        "--list", action="store_true", help="List all available test stages"
+    )
+
     args = parser.parse_args()
     runner = PromptTestRunner()
-    
+
     if args.list:
         print("Available test stages:")
         for config in runner.get_stage_configs():
@@ -71,5 +73,6 @@ def main():
     else:
         runner.run_all_stages()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/shared/shared/shared_types.py b/shared/shared/shared_types.py
index 5501d7a..2f3bce7 100644
--- a/shared/shared/shared_types.py
+++ b/shared/shared/shared_types.py
@@ -92,4 +92,4 @@ class PodcastSegment(BaseModel):
 
 class PodcastOutline(BaseModel):
     title: str
-    segments: List[PodcastSegment]
\ No newline at end of file
+    segments: List[PodcastSegment]

From e8ae615941d0d87643bb11e15caaf19aa28edc34 Mon Sep 17 00:00:00 2001
From: ishandhanani <ishandhanani@gmail.com>
Date: Sun, 10 Nov 2024 17:58:02 -0800
Subject: [PATCH 10/10] prompts -> pkg

---
 services/AgentService/prompts/__init__.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 services/AgentService/prompts/__init__.py

diff --git a/services/AgentService/prompts/__init__.py b/services/AgentService/prompts/__init__.py
new file mode 100644
index 0000000..3865200
--- /dev/null
+++ b/services/AgentService/prompts/__init__.py
@@ -0,0 +1,21 @@
+from .prompts import (
+    RAW_OUTLINE_PROMPT,
+    OUTLINE_PROMPT,
+    SEGMENT_TRANSCRIPT_PROMPT,
+    DEEP_DIVE_PROMPT,
+    RAW_PODCAST_DIALOGUE_PROMPT_v2,
+    FUSE_OUTLINE_PROMPT,
+    REVISE_PROMPT,
+    PODCAST_DIALOGUE_PROMPT,
+)
+
+__all__ = [
+    'RAW_OUTLINE_PROMPT',
+    'OUTLINE_PROMPT',
+    'SEGMENT_TRANSCRIPT_PROMPT',
+    'DEEP_DIVE_PROMPT',
+    'RAW_PODCAST_DIALOGUE_PROMPT_v2',
+    'FUSE_OUTLINE_PROMPT',
+    'REVISE_PROMPT',
+    'PODCAST_DIALOGUE_PROMPT',
+]
\ No newline at end of file