Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ get_helm.sh
.env
.ruff_cache
data/minio
eval.txt
eval.txt
services/AgentService/prompts/tests
.DS_Store
15 changes: 0 additions & 15 deletions services/AgentService/eval-harness/Makefile

This file was deleted.

Empty file.
27 changes: 0 additions & 27 deletions services/AgentService/eval-harness/promptfooconfig.yaml

This file was deleted.

45 changes: 0 additions & 45 deletions services/AgentService/eval-harness/tests/schemas/conversation.json

This file was deleted.

This file was deleted.

13 changes: 1 addition & 12 deletions services/AgentService/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from fastapi import FastAPI, BackgroundTasks, HTTPException
from shared.shared_types import ServiceType, JobStatus, Conversation
from shared.shared_types import ServiceType, JobStatus, Conversation, PodcastOutline
from shared.storage import StorageManager
from shared.job import JobStatusManager
from shared.otel import OpenTelemetryInstrumentation, OpenTelemetryConfig
Expand Down Expand Up @@ -33,17 +33,6 @@
# Data Models


class PodcastSegment(BaseModel):
section: str
descriptions: List[str]
duration: int


class PodcastOutline(BaseModel):
title: str
segments: List[PodcastSegment]


class TranscriptionRequest(BaseModel):
markdown: str
duration: int = 20
Expand Down
56 changes: 56 additions & 0 deletions services/AgentService/prompts/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Colors for better visibility
GREEN=\033[0;32m
RED=\033[0;31m
NC=\033[0m

# Directory structure
CONFIG_DIR=configs
TEST_DIR=tests
OUTPUTS_DIR=$(TEST_DIR)/outputs
SCHEMAS_DIR=$(TEST_DIR)/schemas
INPUTS_DIR=$(TEST_DIR)/inputs

# Ensure promptfoo is installed
ensure-promptfoo:
@echo "$(GREEN)Checking promptfoo installation...$(NC)"
@if ! command -v promptfoo >/dev/null 2>&1; then \
echo "$(RED)Error: promptfoo is not installed. Installing via brew...$(NC)"; \
brew install promptfoo || { echo "$(RED)Failed to install promptfoo$(NC)"; exit 1; }; \
fi

# Generate JSON schemas
generate-schemas:
@echo "$(GREEN)Generating JSON schemas from Pydantic models...$(NC)"
@python scripts/generate_schemas.py $(SCHEMAS_DIR)

# Setup test environment
setup-test: ensure-promptfoo
@echo "$(GREEN)Setting up test environment...$(NC)"
@mkdir -p $(INPUTS_DIR) $(OUTPUTS_DIR) $(SCHEMAS_DIR)
@make generate-schemas

# Run all prompt tests
test-prompts: setup-test
@echo "$(GREEN)Running all prompt tests...$(NC)"
@cd $(CONFIG_DIR) && python ../scripts/run_tests.py

# Run up to a specific stage
test-upto: setup-test
@if [ -z "$(stage)" ]; then \
echo "$(RED)Error: Please specify a stage number with 'stage=N'$(NC)"; \
exit 1; \
fi
@echo "$(GREEN)Running prompt tests up to stage $(stage)...$(NC)"
@cd $(CONFIG_DIR) && python ../scripts/run_tests.py --up-to $(stage)

# List all available test stages
test-list:
@echo "$(GREEN)Listing available test stages...$(NC)"
@python scripts/run_tests.py --list

# Clean test outputs
clean:
@echo "$(GREEN)Cleaning test outputs...$(NC)"
rm -rf $(TEST_DIR)

.PHONY: ensure-promptfoo generate-schemas setup-test test-prompts test-upto clean test-list
21 changes: 21 additions & 0 deletions services/AgentService/prompts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from .prompts import (
RAW_OUTLINE_PROMPT,
OUTLINE_PROMPT,
SEGMENT_TRANSCRIPT_PROMPT,
DEEP_DIVE_PROMPT,
RAW_PODCAST_DIALOGUE_PROMPT_v2,
FUSE_OUTLINE_PROMPT,
REVISE_PROMPT,
PODCAST_DIALOGUE_PROMPT,
)

__all__ = [
'RAW_OUTLINE_PROMPT',
'OUTLINE_PROMPT',
'SEGMENT_TRANSCRIPT_PROMPT',
'DEEP_DIVE_PROMPT',
'RAW_PODCAST_DIALOGUE_PROMPT_v2',
'FUSE_OUTLINE_PROMPT',
'REVISE_PROMPT',
'PODCAST_DIALOGUE_PROMPT',
]
28 changes: 28 additions & 0 deletions services/AgentService/prompts/configs/01_raw_outline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
description: "01 Raw Outline Generation"
evaluateOptions:
maxConcurrency: 1
showProgressBar: true

prompts:
- "file://../prompts.py:PodcastPrompts.raw_outline_prompt"

providers:
- id: "file://../providers/nim-405b.py"
label: "405b"

tests:
- description: "Raw Outline Generation"
vars:
text: file://../data/eval.txt
duration: 15
storeOutputAs: raw_outline_output
assert:
- type: llm-rubric
value: |
Evaluate if the outline:
1. Has clear sections for background, innovation, impact, and future work
2. Makes innovation the focus
3. Allocates time appropriately for {{duration}} minutes
4. Maintains technical accuracy while being accessible
Pass if all scores >= 0.95
provider: "file://../scripts/nim-provider.py"
25 changes: 25 additions & 0 deletions services/AgentService/prompts/configs/02_outline_json.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
description: "02 Outline JSON Generation"
evaluateOptions:
maxConcurrency: 1
showProgressBar: true

prompts:
- "file://../prompts.py:PodcastPrompts.outline_prompt"

providers:
- id: "file://../providers/nim-8b.py"
label: "8b"
config:
json_schema: file://../tests/schemas/podcast_outline.json

tests:
- description: "Outline JSON Generation"
vars:
text: file://../tests/outputs/01_raw_outline_results.json
schema: file://../tests/schemas/podcast_outline.json
options:
transformVars: file://../scripts/get-output.py:get_transform
storeOutputAs: outline_json_output
assert:
- type: is-json
value: file://../tests/schemas/podcast_outline.json
79 changes: 79 additions & 0 deletions services/AgentService/prompts/providers/nim-405b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# chat_provider.py
import requests
import json
from typing import Dict, Any


def call_api(
prompt: str, options: Dict[str, Any], context: Dict[str, Any]
) -> Dict[str, Any]:
"""
Custom provider for chat completions using your existing infrastructure.

Args:
prompt: The prompt text or JSON string of messages
options: Configuration options from the YAML file
context: Test context including variables used

Returns:
Dict containing output or error
"""
try:
# Get configuration from options
config = options.get("config", {})
api_base = config.get("api_base", "https://youngthug.demoz.io")
api_key = config.get("api_key")
model = config.get("model", "meta/llama-3.1-405b-instruct")
temperature = config.get("temperature", 0.7)
max_tokens = config.get("max_tokens", 1000)

# Setup headers
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"

# Handle different prompt formats
try:
# Check if prompt is a JSON string containing messages
messages = json.loads(prompt)
if isinstance(messages, list):
chat_messages = messages
else:
chat_messages = [{"role": "user", "content": prompt}]
except json.JSONDecodeError:
# If not JSON, treat as regular prompt
chat_messages = [{"role": "user", "content": prompt}]

# Prepare payload
payload = {
"model": model,
"messages": chat_messages,
"temperature": temperature,
"max_tokens": max_tokens,
"stream": False,
}

# Make request
response = requests.post(
f"{api_base}/v1/chat/completions", headers=headers, json=payload
)
response.raise_for_status()
result = response.json()

if "choices" in result and len(result["choices"]) > 0:
output = result["choices"][0]["message"]["content"]
return {
"output": output,
"tokenUsage": result.get(
"usage", {"total": 0, "prompt": 0, "completion": 0}
),
}
else:
return {"error": "No choices in response"}

except requests.exceptions.RequestException as e:
return {
"error": f"Request error: {str(e)}\nResponse: {response.text if 'response' in locals() else 'No response'}"
}
except Exception as e:
return {"error": f"Unexpected error: {str(e)}"}
Loading