ArchishmanSengupta · Mohith1612 · Mar 25, 2026 · Mar 26, 2026 · Apr 12, 2026
diff --git a/.env.example b/.env.example
@@ -8,3 +8,9 @@ SMALLEST_API_KEY=your-smallest-api-key
 
 # ElevenLabs ConvAI (if provider: elevenlabs)
 ELEVENLABS_API_KEY=your-elevenlabs-api-key
+
+# LiveKit (if provider: livekit)
+LIVEKIT_API_KEY=your-livekit-api-key
+LIVEKIT_API_SECRET=your-livekit-api-secret
+# LIVEKIT_URL can also go here instead of config.yaml
+# LIVEKIT_URL=wss://your-project.livekit.cloud
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
 .env
-venv/
+.venv/
 __pycache__/
 *.pyc
 results/

diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ A self-improving loop for voice AI agents. Inspired by the keep/revert pattern f
 
 It generates adversarial callers, attacks your agent, proposes prompt improvements one at a time, keeps what works, reverts what doesn't. Run it overnight, wake up to a better agent.
 
-Works with [Vapi](https://vapi.ai), [Smallest AI](https://smallest.ai), and [ElevenLabs ConvAI](https://elevenlabs.io/conversational-ai).
+Works with [Vapi](https://vapi.ai), [Smallest AI](https://smallest.ai), [ElevenLabs ConvAI](https://elevenlabs.io/conversational-ai), and [LiveKit](https://livekit.io).
 
 ```
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -53,6 +53,11 @@ SMALLEST_API_KEY=your-smallest-api-key
 
 # If using ElevenLabs
 ELEVENLABS_API_KEY=your-elevenlabs-api-key
+
+# If using LiveKit
+LIVEKIT_URL=wss://your-project.livekit.cloud
+LIVEKIT_API_KEY=your-livekit-api-key
+LIVEKIT_API_SECRET=your-livekit-api-secret
 ```
 
 You need the Anthropic key (for Claude, which generates scenarios and judges conversations) plus the key for whichever voice platform your agent runs on.
@@ -70,14 +75,17 @@ cp examples/smallest.config.yaml config.yaml
 
 # For ElevenLabs
 cp examples/elevenlabs.config.yaml config.yaml
+
+# For LiveKit
+cp examples/livekit.config.yaml config.yaml
 ```
 
 Then open `config.yaml` and replace the example with your agent's details.
 
 The config has three required fields:
 
 ```yaml
-provider: vapi                  # "vapi", "smallest", or "elevenlabs"
+provider: vapi                  # "vapi", "smallest", "elevenlabs", or "livekit"
 
 assistant:
   id: "your-agent-id"           # from your platform dashboard
@@ -297,6 +305,7 @@ Weights and threshold are configurable in `config.yaml` under `scoring:`.
 | **[Vapi](https://vapi.ai)** | Live multi-turn conversations via Vapi Chat API | Read/write via assistant PATCH endpoint |
 | **[Smallest AI](https://smallest.ai)** | Simulated — Claude plays the agent using the system prompt from the platform | Read/write via Atoms workflow API |
 | **[ElevenLabs ConvAI](https://elevenlabs.io/conversational-ai)** | Native `simulate-conversation` endpoint — ElevenLabs runs the real deployed agent (with its tools and knowledge base) and plays the user via a persona prompt | Read/write via agent PATCH endpoint |
+| **[LiveKit](https://livekit.io)** | Text-based evals via LiveKit data channel messages — Phase 1 (no audio). Caller bot joins a room and exchanges turns as JSON. | Delegated to `agent_backend` (e.g. `"smallest"`) or managed externally |
 
 **Why simulated for Smallest AI?** Atoms agents only accept audio input through LiveKit rooms — there's no text chat API. Since the system optimizes the *prompt* (not the voice pipeline), simulating conversations with Claude using the actual prompt from the platform is effective and fast.
 
@@ -325,7 +334,8 @@ autovoiceevals/
 ├── examples/
 │   ├── vapi.config.yaml          Salon booking agent on Vapi
 │   ├── smallest.config.yaml      Pizza delivery agent on Smallest AI
-│   └── elevenlabs.config.yaml    Medical clinic scheduling agent on ElevenLabs
+│   ├── elevenlabs.config.yaml    Medical clinic scheduling agent on ElevenLabs
+│   └── livekit.config.yaml       LiveKit data-channel agent (Phase 1)
 └── autovoiceevals/               Core package
     ├── cli.py                    CLI (research | pipeline subcommands)
     ├── config.py                 Config loading + validation
@@ -335,6 +345,7 @@ autovoiceevals/
     ├── vapi.py                   Vapi client
     ├── smallest.py               Smallest AI client
     ├── elevenlabs.py             ElevenLabs ConvAI client
+    ├── livekit_provider.py       LiveKit data channel client
     ├── llm.py                    Claude client
     ├── evaluator.py              Scenario generation, judging, prompt proposals
     ├── results.py                Post-run results viewer

diff --git a/autovoiceevals/config.py b/autovoiceevals/config.py
@@ -83,6 +83,19 @@ class OutputConfig:
     graphs: bool = True
 
 
+@dataclass
+class LiveKitConfig:
+    url: str = ""
+    room_prefix: str = "eval"
+    data_topic: str = "text"
+    response_timeout: float = 30.0
+    agent_join_timeout: float = 30.0
+    agent_backend: str = "none"    # "smallest" | "local" | "none"
+    system_prompt: str = ""        # initial prompt when agent_backend="local"
+    system_prompt_file: str = ""   # path to prompt file; overrides system_prompt if it exists
+    inject_system_prompt: bool = False  # send prompt as first data msg each conversation
+
+
 # ---------------------------------------------------------------------------
 # Top-level config
 # ---------------------------------------------------------------------------
@@ -96,11 +109,14 @@ class Config:
     conversation: ConversationConfig
     llm: LLMConfig
     output: OutputConfig
-    provider: str = "vapi"         # "vapi", "smallest", or "elevenlabs"
+    livekit: LiveKitConfig = None
+    provider: str = "vapi"         # "vapi", "smallest", "elevenlabs", or "livekit"
     anthropic_api_key: str = ""
     vapi_api_key: str = ""
     smallest_api_key: str = ""
     elevenlabs_api_key: str = ""
+    livekit_api_key: str = ""
+    livekit_api_secret: str = ""
 
 
 # ---------------------------------------------------------------------------
@@ -123,14 +139,18 @@ def load_config(path: str | None = None) -> Config:
 
     # --- Provider ---
     provider = raw.get("provider", "vapi")
-    if provider not in ("vapi", "smallest", "elevenlabs"):
-        raise ValueError(f"Unknown provider: {provider}. Must be 'vapi', 'smallest', or 'elevenlabs'.")
+    if provider not in ("vapi", "smallest", "elevenlabs", "livekit"):
+        raise ValueError(
+            f"Unknown provider: {provider}. Must be 'vapi', 'smallest', 'elevenlabs', or 'livekit'."
+        )
 
     # --- API keys (from env only, never from YAML) ---
     anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
     vapi_key = os.environ.get("VAPI_API_KEY", "")
     smallest_key = os.environ.get("SMALLEST_API_KEY", "")
     elevenlabs_key = os.environ.get("ELEVENLABS_API_KEY", "")
+    livekit_api_key = os.environ.get("LIVEKIT_API_KEY", "")
+    livekit_api_secret = os.environ.get("LIVEKIT_API_SECRET", "")
 
     if not anthropic_key:
         raise ValueError("ANTHROPIC_API_KEY not set in .env or environment")
@@ -140,6 +160,11 @@ def load_config(path: str | None = None) -> Config:
         raise ValueError("SMALLEST_API_KEY not set in .env or environment")
     if provider == "elevenlabs" and not elevenlabs_key:
         raise ValueError("ELEVENLABS_API_KEY not set in .env or environment")
+    if provider == "livekit":
+        if not livekit_api_key:
+            raise ValueError("LIVEKIT_API_KEY not set in .env or environment")
+        if not livekit_api_secret:
+            raise ValueError("LIVEKIT_API_SECRET not set in .env or environment")
 
     # --- Assistant (required) ---
     ast = raw.get("assistant", {})
@@ -169,6 +194,26 @@ def load_config(path: str | None = None) -> Config:
     cv = raw.get("conversation", {})
     lm = raw.get("llm", {})
     out = raw.get("output", {})
+    lk = raw.get("livekit", {})
+
+    # --- LiveKit section (required if provider == "livekit") ---
+    livekit_url = lk.get("url", os.environ.get("LIVEKIT_URL", ""))
+    if provider == "livekit" and not livekit_url:
+        raise ValueError(
+            "livekit.url is required when provider is 'livekit'. "
+            "Set it in config.yaml or LIVEKIT_URL in .env."
+        )
+    livekit_cfg = LiveKitConfig(
+        url=livekit_url,
+        room_prefix=lk.get("room_prefix", "eval"),
+        data_topic=lk.get("data_topic", "text"),
+        response_timeout=float(lk.get("response_timeout", 30.0)),
+        agent_join_timeout=float(lk.get("agent_join_timeout", 30.0)),
+        agent_backend=lk.get("agent_backend", "none"),
+        system_prompt=lk.get("system_prompt", ""),
+        system_prompt_file=lk.get("system_prompt_file", ""),
+        inject_system_prompt=bool(lk.get("inject_system_prompt", False)),
+    )
 
     return Config(
         assistant=AssistantConfig(
@@ -203,9 +248,12 @@ def load_config(path: str | None = None) -> Config:
             save_transcripts=out.get("save_transcripts", True),
             graphs=out.get("graphs", True),
         ),
+        livekit=livekit_cfg,
         provider=provider,
         anthropic_api_key=anthropic_key,
         vapi_api_key=vapi_key,
         smallest_api_key=smallest_key,
         elevenlabs_api_key=elevenlabs_key,
+        livekit_api_key=livekit_api_key,
+        livekit_api_secret=livekit_api_secret,
     )