Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ SMALLEST_API_KEY=your-smallest-api-key

# ElevenLabs ConvAI (if provider: elevenlabs)
ELEVENLABS_API_KEY=your-elevenlabs-api-key

# LiveKit (if provider: livekit)
LIVEKIT_API_KEY=your-livekit-api-key
LIVEKIT_API_SECRET=your-livekit-api-secret
# LIVEKIT_URL can also go here instead of config.yaml
# LIVEKIT_URL=wss://your-project.livekit.cloud
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.env
venv/
.venv/
__pycache__/
*.pyc
results/
Expand Down
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ A self-improving loop for voice AI agents. Inspired by the keep/revert pattern f

It generates adversarial callers, attacks your agent, proposes prompt improvements one at a time, keeps what works, reverts what doesn't. Run it overnight, wake up to a better agent.

Works with [Vapi](https://vapi.ai), [Smallest AI](https://smallest.ai), and [ElevenLabs ConvAI](https://elevenlabs.io/conversational-ai).
Works with [Vapi](https://vapi.ai), [Smallest AI](https://smallest.ai), [ElevenLabs ConvAI](https://elevenlabs.io/conversational-ai), and [LiveKit](https://livekit.io).

```
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Expand Down Expand Up @@ -53,6 +53,11 @@ SMALLEST_API_KEY=your-smallest-api-key

# If using ElevenLabs
ELEVENLABS_API_KEY=your-elevenlabs-api-key

# If using LiveKit
LIVEKIT_URL=wss://your-project.livekit.cloud
LIVEKIT_API_KEY=your-livekit-api-key
LIVEKIT_API_SECRET=your-livekit-api-secret
```

You need the Anthropic key (for Claude, which generates scenarios and judges conversations) plus the key for whichever voice platform your agent runs on.
Expand All @@ -70,14 +75,17 @@ cp examples/smallest.config.yaml config.yaml

# For ElevenLabs
cp examples/elevenlabs.config.yaml config.yaml

# For LiveKit
cp examples/livekit.config.yaml config.yaml
```

Then open `config.yaml` and replace the example with your agent's details.

The config has three required fields:

```yaml
provider: vapi # "vapi", "smallest", or "elevenlabs"
provider: vapi # "vapi", "smallest", "elevenlabs", or "livekit"

assistant:
id: "your-agent-id" # from your platform dashboard
Expand Down Expand Up @@ -297,6 +305,7 @@ Weights and threshold are configurable in `config.yaml` under `scoring:`.
| **[Vapi](https://vapi.ai)** | Live multi-turn conversations via Vapi Chat API | Read/write via assistant PATCH endpoint |
| **[Smallest AI](https://smallest.ai)** | Simulated — Claude plays the agent using the system prompt from the platform | Read/write via Atoms workflow API |
| **[ElevenLabs ConvAI](https://elevenlabs.io/conversational-ai)** | Native `simulate-conversation` endpoint — ElevenLabs runs the real deployed agent (with its tools and knowledge base) and plays the user via a persona prompt | Read/write via agent PATCH endpoint |
| **[LiveKit](https://livekit.io)** | Text-based evals via LiveKit data channel messages — Phase 1 (no audio). Caller bot joins a room and exchanges turns as JSON. | Delegated to `agent_backend` (e.g. `"smallest"`) or managed externally |

**Why simulated for Smallest AI?** Atoms agents only accept audio input through LiveKit rooms — there's no text chat API. Since the system optimizes the *prompt* (not the voice pipeline), simulating conversations with Claude using the actual prompt from the platform is effective and fast.

Expand Down Expand Up @@ -325,7 +334,8 @@ autovoiceevals/
├── examples/
│ ├── vapi.config.yaml Salon booking agent on Vapi
│ ├── smallest.config.yaml Pizza delivery agent on Smallest AI
│ └── elevenlabs.config.yaml Medical clinic scheduling agent on ElevenLabs
│ ├── elevenlabs.config.yaml Medical clinic scheduling agent on ElevenLabs
│ └── livekit.config.yaml LiveKit data-channel agent (Phase 1)
└── autovoiceevals/ Core package
├── cli.py CLI (research | pipeline subcommands)
├── config.py Config loading + validation
Expand All @@ -335,6 +345,7 @@ autovoiceevals/
├── vapi.py Vapi client
├── smallest.py Smallest AI client
├── elevenlabs.py ElevenLabs ConvAI client
├── livekit_provider.py LiveKit data channel client
├── llm.py Claude client
├── evaluator.py Scenario generation, judging, prompt proposals
├── results.py Post-run results viewer
Expand Down
54 changes: 51 additions & 3 deletions autovoiceevals/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ class OutputConfig:
graphs: bool = True


@dataclass
class LiveKitConfig:
url: str = ""
room_prefix: str = "eval"
data_topic: str = "text"
response_timeout: float = 30.0
agent_join_timeout: float = 30.0
agent_backend: str = "none" # "smallest" | "local" | "none"
system_prompt: str = "" # initial prompt when agent_backend="local"
system_prompt_file: str = "" # path to prompt file; overrides system_prompt if it exists
inject_system_prompt: bool = False # send prompt as first data msg each conversation


# ---------------------------------------------------------------------------
# Top-level config
# ---------------------------------------------------------------------------
Expand All @@ -96,11 +109,14 @@ class Config:
conversation: ConversationConfig
llm: LLMConfig
output: OutputConfig
provider: str = "vapi" # "vapi", "smallest", or "elevenlabs"
livekit: LiveKitConfig = None
provider: str = "vapi" # "vapi", "smallest", "elevenlabs", or "livekit"
anthropic_api_key: str = ""
vapi_api_key: str = ""
smallest_api_key: str = ""
elevenlabs_api_key: str = ""
livekit_api_key: str = ""
livekit_api_secret: str = ""


# ---------------------------------------------------------------------------
Expand All @@ -123,14 +139,18 @@ def load_config(path: str | None = None) -> Config:

# --- Provider ---
provider = raw.get("provider", "vapi")
if provider not in ("vapi", "smallest", "elevenlabs"):
raise ValueError(f"Unknown provider: {provider}. Must be 'vapi', 'smallest', or 'elevenlabs'.")
if provider not in ("vapi", "smallest", "elevenlabs", "livekit"):
raise ValueError(
f"Unknown provider: {provider}. Must be 'vapi', 'smallest', 'elevenlabs', or 'livekit'."
)

# --- API keys (from env only, never from YAML) ---
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
vapi_key = os.environ.get("VAPI_API_KEY", "")
smallest_key = os.environ.get("SMALLEST_API_KEY", "")
elevenlabs_key = os.environ.get("ELEVENLABS_API_KEY", "")
livekit_api_key = os.environ.get("LIVEKIT_API_KEY", "")
livekit_api_secret = os.environ.get("LIVEKIT_API_SECRET", "")

if not anthropic_key:
raise ValueError("ANTHROPIC_API_KEY not set in .env or environment")
Expand All @@ -140,6 +160,11 @@ def load_config(path: str | None = None) -> Config:
raise ValueError("SMALLEST_API_KEY not set in .env or environment")
if provider == "elevenlabs" and not elevenlabs_key:
raise ValueError("ELEVENLABS_API_KEY not set in .env or environment")
if provider == "livekit":
if not livekit_api_key:
raise ValueError("LIVEKIT_API_KEY not set in .env or environment")
if not livekit_api_secret:
raise ValueError("LIVEKIT_API_SECRET not set in .env or environment")

# --- Assistant (required) ---
ast = raw.get("assistant", {})
Expand Down Expand Up @@ -169,6 +194,26 @@ def load_config(path: str | None = None) -> Config:
cv = raw.get("conversation", {})
lm = raw.get("llm", {})
out = raw.get("output", {})
lk = raw.get("livekit", {})

# --- LiveKit section (required if provider == "livekit") ---
livekit_url = lk.get("url", os.environ.get("LIVEKIT_URL", ""))
if provider == "livekit" and not livekit_url:
raise ValueError(
"livekit.url is required when provider is 'livekit'. "
"Set it in config.yaml or LIVEKIT_URL in .env."
)
livekit_cfg = LiveKitConfig(
url=livekit_url,
room_prefix=lk.get("room_prefix", "eval"),
data_topic=lk.get("data_topic", "text"),
response_timeout=float(lk.get("response_timeout", 30.0)),
agent_join_timeout=float(lk.get("agent_join_timeout", 30.0)),
agent_backend=lk.get("agent_backend", "none"),
system_prompt=lk.get("system_prompt", ""),
system_prompt_file=lk.get("system_prompt_file", ""),
inject_system_prompt=bool(lk.get("inject_system_prompt", False)),
)

return Config(
assistant=AssistantConfig(
Expand Down Expand Up @@ -203,9 +248,12 @@ def load_config(path: str | None = None) -> Config:
save_transcripts=out.get("save_transcripts", True),
graphs=out.get("graphs", True),
),
livekit=livekit_cfg,
provider=provider,
anthropic_api_key=anthropic_key,
vapi_api_key=vapi_key,
smallest_api_key=smallest_key,
elevenlabs_api_key=elevenlabs_key,
livekit_api_key=livekit_api_key,
livekit_api_secret=livekit_api_secret,
)
Loading