From 79113c9906c1b33ede2122c0fac2976e0ac31ec3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 21:52:21 +0800
Subject: [PATCH 1/2] feat(web): add LiveVoiceSession for real-time voice chat
 (#1345)

Add a continuous voice conversation mode using the Web Speech API:
- New LiveVoiceSession component with speech recognition, waveform
  visualizer (AudioContext + AnalyserNode), and control bar
- Integrate into PiChat with liveMode toggle replacing VoiceRecorder
- Three states: IDLE (listening), SENDING (thinking), SPEAKING (placeholder)
- Auto-restart recognition on silence, mute/unmute support

Closes #1345

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 web/src/components/LiveVoiceSession.tsx | 450 ++++++++++++++++++++++++
 web/src/pages/PiChat.tsx                |  34 +-
 2 files changed, 476 insertions(+), 8 deletions(-)
 create mode 100644 web/src/components/LiveVoiceSession.tsx
diff --git a/web/src/components/LiveVoiceSession.tsx b/web/src/components/LiveVoiceSession.tsx
new file mode 100644
index 00000000..a08776ea
--- /dev/null
+++ b/web/src/components/LiveVoiceSession.tsx
@@ -0,0 +1,450 @@
+/*
+ * Copyright 2025 Rararulab
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+import { useState, useEffect, useRef, useCallback } from "react";
+import { Mic, MicOff, PhoneOff } from "lucide-react";
+import { buildWsUrl } from "@/adapters/rara-stream";
+
+// ---------------------------------------------------------------------------
+// Web Speech API type declarations
+// The Web Speech API is not fully standardized and TypeScript's lib.dom does
+// not include SpeechRecognition / SpeechRecognitionEvent. We declare the
+// subset we use here to avoid pulling in @types/dom-speech-recognition.
+// ---------------------------------------------------------------------------
+
+interface SpeechRecognitionEvent extends Event {
+  readonly resultIndex: number;
+  readonly results: SpeechRecognitionResultList;
+}
+
+interface SpeechRecognitionErrorEvent extends Event {
+  readonly error: string;
+  readonly message: string;
+}
+
+interface SpeechRecognitionInstance extends EventTarget {
+  continuous: boolean;
+  interimResults: boolean;
+  lang: string;
+  start(): void;
+  stop(): void;
+  abort(): void;
+  onresult: ((event: SpeechRecognitionEvent) => void) | null;
+  onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
+  onend: (() => void) | null;
+}
+
+interface SpeechRecognitionConstructor {
+  new (): SpeechRecognitionInstance;
+}
+
+declare global {
+  interface Window {
+    SpeechRecognition?: SpeechRecognitionConstructor;
+    webkitSpeechRecognition?: SpeechRecognitionConstructor;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+type VoiceState = "idle" | "sending" | "speaking";
+
+type LiveVoiceSessionProps = {
+  /** Returns the current session key for WebSocket connections. */
+  getSessionKey: () => string | undefined;
+  /** Called when the backend finishes processing one turn. */
+  onTurnComplete: () => void;
+  /** Called when the user ends the live voice session. */
+  onClose: () => void;
+};
+
+// ---------------------------------------------------------------------------
+// Audio Visualizer (inline — replaces LiveKit Agents UI dependency)
+// ---------------------------------------------------------------------------
+
+function AudioVisualizer({
+  analyser,
+  state,
+}: {
+  analyser: AnalyserNode | null;
+  state: VoiceState;
+}) {
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+  const animFrameRef = useRef<number>(0);
+
+  useEffect(() => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+    const ctx = canvas.getContext("2d");
+    if (!ctx) return;
+
+    const BAR_COUNT = 32;
+    const dataArray = analyser ? new Uint8Array(analyser.frequencyBinCount) : null;
+
+    function draw() {
+      if (!ctx || !canvas) return;
+      const w = canvas.width;
+      const h = canvas.height;
+      ctx.clearRect(0, 0, w, h);
+
+      // Determine color based on state
+      const color =
+        state === "sending"
+          ? "rgba(156, 163, 175, 0.5)" // gray — thinking
+          : state === "speaking"
+            ? "rgba(34, 197, 94, 0.7)" // green — speaking
+            : "rgba(59, 130, 246, 0.6)"; // blue — listening
+
+      if (analyser && dataArray) {
+        analyser.getByteFrequencyData(dataArray);
+      }
+
+      const barWidth = w / BAR_COUNT - 2;
+      const centerY = h / 2;
+
+      for (let i = 0; i < BAR_COUNT; i++) {
+        // Map frequency bin to bar height
+        const binIndex = dataArray
+          ? Math.floor((i / BAR_COUNT) * dataArray.length)
+          : 0;
+        const value = dataArray ? dataArray[binIndex] : 0;
+        // Minimum bar height for idle state
+        const barHeight = Math.max(2, (value / 255) * (h * 0.8));
+
+        const x = i * (barWidth + 2) + 1;
+        ctx.fillStyle = color;
+        ctx.roundRect(x, centerY - barHeight / 2, barWidth, barHeight, 2);
+        ctx.fill();
+      }
+
+      animFrameRef.current = requestAnimationFrame(draw);
+    }
+
+    draw();
+
+    return () => {
+      cancelAnimationFrame(animFrameRef.current);
+    };
+  }, [analyser, state]);
+
+  return (
+    <canvas
+      ref={canvasRef}
+      width={320}
+      height={48}
+      className="mx-auto block"
+    />
+  );
+}
+
+// ---------------------------------------------------------------------------
+// LiveVoiceSession — main component
+// ---------------------------------------------------------------------------
+
+/**
+ * Bottom voice panel for real-time voice conversation.
+ * Uses Web Speech API for continuous speech-to-text, sends transcribed text
+ * through the existing WebSocket chat API, and displays a waveform visualizer.
+ */
+export function LiveVoiceSession({
+  getSessionKey,
+  onTurnComplete,
+  onClose,
+}: LiveVoiceSessionProps) {
+  const [state, setState] = useState<VoiceState>("idle");
+  const [muted, setMuted] = useState(false);
+  const [interimText, setInterimText] = useState("");
+  const [finalText, setFinalText] = useState("");
+  const [error, setError] = useState<string | null>(null);
+  const [analyser, setAnalyser] = useState<AnalyserNode | null>(null);
+
+  // Refs for cleanup-safe access
+  const recognitionRef = useRef<SpeechRecognitionInstance | null>(null);
+  const wsRef = useRef<WebSocket | null>(null);
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+  const liveModeRef = useRef(true);
+  const mutedRef = useRef(false);
+
+  // Keep mutedRef in sync with muted state
+  useEffect(() => {
+    mutedRef.current = muted;
+  }, [muted]);
+
+  // ---------------------------------------------------------------------------
+  // Speech recognition management
+  // ---------------------------------------------------------------------------
+
+  const resumeRecognition = useCallback(() => {
+    if (!liveModeRef.current || mutedRef.current) return;
+    try {
+      recognitionRef.current?.start();
+    } catch {
+      // May already be running
+    }
+  }, []);
+
+  // ---------------------------------------------------------------------------
+  // Send transcribed text to backend via WebSocket
+  // ---------------------------------------------------------------------------
+
+  const sendText = useCallback(
+    (text: string) => {
+      const sessionKey = getSessionKey();
+      if (!sessionKey || !text.trim()) return;
+
+      setState("sending");
+      setFinalText(text);
+      setInterimText("");
+
+      // Pause recognition while waiting for response
+      try {
+        recognitionRef.current?.stop();
+      } catch {
+        // May already be stopped
+      }
+
+      const wsUrl = buildWsUrl(sessionKey);
+      const ws = new WebSocket(wsUrl);
+      wsRef.current = ws;
+
+      ws.onopen = () => {
+        ws.send(text);
+      };
+
+      ws.onmessage = (ev: MessageEvent) => {
+        try {
+          const event = JSON.parse(ev.data as string);
+          if (event.type === "done" || event.type === "message") {
+            ws.close();
+          } else if (event.type === "error") {
+            console.error("Voice WS error:", event.message);
+            ws.close();
+          }
+        } catch {
+          // Ignore non-JSON frames
+        }
+      };
+
+      ws.onerror = () => {
+        console.error("Voice WebSocket connection error");
+        setState("idle");
+        resumeRecognition();
+      };
+
+      ws.onclose = () => {
+        wsRef.current = null;
+        onTurnComplete();
+        setState("idle");
+        setFinalText("");
+        resumeRecognition();
+      };
+    },
+    [getSessionKey, onTurnComplete, resumeRecognition],
+  );
+
+  // Initialize speech recognition and microphone audio
+  useEffect(() => {
+    const SpeechRecognitionCtor =
+      window.SpeechRecognition ?? window.webkitSpeechRecognition;
+    if (!SpeechRecognitionCtor) {
+      setError("Speech recognition is not supported in this browser.");
+      return;
+    }
+
+    const recognition = new SpeechRecognitionCtor();
+    recognition.continuous = true;
+    recognition.interimResults = true;
+    recognition.lang = "zh-CN";
+    recognitionRef.current = recognition;
+
+    recognition.onresult = (event) => {
+      const result = event.results[event.resultIndex];
+      if (result.isFinal) {
+        const transcript = result[0].transcript.trim();
+        if (transcript) {
+          sendText(transcript);
+        }
+      } else {
+        setInterimText(result[0].transcript);
+      }
+    };
+
+    recognition.onerror = (event) => {
+      // "no-speech" and "aborted" are expected during normal operation
+      if (event.error === "no-speech" || event.error === "aborted") return;
+      console.error("Speech recognition error:", event.error);
+      if (event.error === "not-allowed") {
+        setError("Microphone access denied. Please allow microphone access.");
+      }
+    };
+
+    recognition.onend = () => {
+      // Auto-restart if still in live mode and not muted
+      if (liveModeRef.current && !mutedRef.current) {
+        try {
+          recognition.start();
+        } catch {
+          // May fail if already started
+        }
+      }
+    };
+
+    // Start listening
+    try {
+      recognition.start();
+    } catch (err) {
+      console.error("Failed to start speech recognition:", err);
+      setError("Failed to start speech recognition.");
+    }
+
+    // Set up AudioContext for visualizer
+    navigator.mediaDevices
+      .getUserMedia({ audio: true })
+      .then((stream) => {
+        streamRef.current = stream;
+        const audioCtx = new AudioContext();
+        audioCtxRef.current = audioCtx;
+        const source = audioCtx.createMediaStreamSource(stream);
+        const analyserNode = audioCtx.createAnalyser();
+        analyserNode.fftSize = 256;
+        source.connect(analyserNode);
+        // Do NOT connect to destination — we don't want to hear our own mic
+        setAnalyser(analyserNode);
+      })
+      .catch((err) => {
+        console.error("Microphone access for visualizer failed:", err);
+        // Non-fatal — visualizer just won't work
+      });
+
+    // Cleanup on unmount
+    return () => {
+      liveModeRef.current = false;
+      try {
+        recognition.stop();
+      } catch {
+        // ignore
+      }
+      recognitionRef.current = null;
+      wsRef.current?.close();
+      wsRef.current = null;
+      streamRef.current?.getTracks().forEach((t) => t.stop());
+      audioCtxRef.current?.close();
+    };
+  }, [sendText]);
+
+  // ---------------------------------------------------------------------------
+  // Mute / unmute
+  // ---------------------------------------------------------------------------
+
+  const toggleMute = useCallback(() => {
+    setMuted((prev) => {
+      const next = !prev;
+      if (next) {
+        // Muting — stop recognition
+        try {
+          recognitionRef.current?.stop();
+        } catch {
+          // ignore
+        }
+      } else {
+        // Unmuting — restart recognition
+        try {
+          recognitionRef.current?.start();
+        } catch {
+          // ignore
+        }
+      }
+      return next;
+    });
+  }, []);
+
+  // ---------------------------------------------------------------------------
+  // Close session
+  // ---------------------------------------------------------------------------
+
+  const handleClose = useCallback(() => {
+    liveModeRef.current = false;
+    try {
+      recognitionRef.current?.stop();
+    } catch {
+      // ignore
+    }
+    wsRef.current?.close();
+    onClose();
+  }, [onClose]);
+
+  // ---------------------------------------------------------------------------
+  // Status text
+  // ---------------------------------------------------------------------------
+
+  const statusText =
+    error ??
+    (state === "sending"
+      ? "Thinking..."
+      : muted
+        ? "Muted"
+        : "Listening...");
+
+  // ---------------------------------------------------------------------------
+  // Render
+  // ---------------------------------------------------------------------------
+
+  return (
+    <div className="absolute inset-x-0 bottom-0 z-50 flex h-48 flex-col items-center justify-center gap-3 border-t bg-background/95 px-4 backdrop-blur">
+      {/* Waveform visualizer */}
+      <AudioVisualizer analyser={analyser} state={state} />
+
+      {/* Interim transcription or confirmed text */}
+      <div className="h-5 w-full max-w-md text-center">
+        {state === "sending" && finalText ? (
+          <span className="text-sm text-foreground truncate block">
+            {finalText}
+          </span>
+        ) : interimText ? (
+          <span className="text-sm italic text-muted-foreground truncate block">
+            {interimText}
+          </span>
+        ) : null}
+      </div>
+
+      {/* Status text */}
+      <div className="text-xs text-muted-foreground">{statusText}</div>
+
+      {/* Control bar */}
+      <div className="flex items-center gap-4">
+        {/* Mute button */}
+        <button
+          onClick={toggleMute}
+          disabled={state === "sending"}
+          className={`flex h-10 w-10 cursor-pointer items-center justify-center rounded-full transition-colors ${
+            muted
+              ? "bg-destructive text-destructive-foreground hover:bg-destructive/90"
+              : "bg-secondary text-secondary-foreground hover:bg-secondary/80"
+          }`}
+          title={muted ? "Unmute" : "Mute"}
+        >
+          {muted ? <MicOff size={18} /> : <Mic size={18} />}
+        </button>
+
+        {/* End session button */}
+        <button
+          onClick={handleClose}
+          className="flex h-10 w-10 cursor-pointer items-center justify-center rounded-full bg-destructive text-destructive-foreground transition-colors hover:bg-destructive/90"
+          title="End voice session"
+        >
+          <PhoneOff size={18} />
+        </button>
+      </div>
+    </div>
+  );
+}
diff --git a/web/src/pages/PiChat.tsx b/web/src/pages/PiChat.tsx
index 2220b31a..63312284 100644
--- a/web/src/pages/PiChat.tsx
+++ b/web/src/pages/PiChat.tsx
@@ -34,7 +34,7 @@ import { createRaraStreamFn } from "@/adapters/rara-stream";
 import { api } from "@/api/client";
 import type { ChatSession, ChatMessageData } from "@/api/types";
 import { useNavigate } from "react-router";
-import { VoiceRecorder } from "@/components/VoiceRecorder";
+import { LiveVoiceSession } from "@/components/LiveVoiceSession";
 
 /** Strip `<think>...</think>` blocks from assistant text. */
 function stripThinkTags(text: string): string {
@@ -259,6 +259,7 @@ export default function PiChat() {
   const agentRef = useRef<Agent | null>(null);
   const chatPanelRef = useRef<import("@mariozechner/pi-web-ui").ChatPanel | null>(null);
   const [showSessionList, setShowSessionList] = useState(false);
+  const [liveMode, setLiveMode] = useState(false);
   const navigate = useNavigate();
 
   /** Switch the agent to a different session, loading its history. */
@@ -405,15 +406,32 @@ export default function PiChat() {
           <path d="M3 12h18M3 6h18M3 18h18" />
         </svg>
       </button>
-      {/* Voice recorder button — fixed top-right */}
-      <div className="absolute right-2 top-2 z-50">
-        <VoiceRecorder
-          getSessionKey={() => agentRef.current?.sessionId}
-          onComplete={reloadMessages}
-        />
-      </div>
+      {/* Voice button — fixed top-right */}
+      {!liveMode && (
+        <div className="absolute right-2 top-2 z-50">
+          <button
+            onClick={() => setLiveMode(true)}
+            className="flex h-11 w-11 cursor-pointer items-center justify-center rounded-full bg-background/80 text-muted-foreground shadow-md backdrop-blur transition-all hover:bg-secondary hover:text-foreground"
+            title="Start live voice chat"
+          >
+            <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+              <rect x="9" y="2" width="6" height="12" rx="3" />
+              <path d="M5 10a7 7 0 0 0 14 0" />
+              <line x1="12" y1="19" x2="12" y2="22" />
+            </svg>
+          </button>
+        </div>
+      )}
       {/* Chat panel container */}
       <div ref={containerRef} className="h-full w-full" />
+      {/* Live voice panel — bottom overlay */}
+      {liveMode && (
+        <LiveVoiceSession
+          getSessionKey={() => agentRef.current?.sessionId}
+          onTurnComplete={reloadMessages}
+          onClose={() => setLiveMode(false)}
+        />
+      )}
       {/* Session list slide-over */}
       {showSessionList && (
         <SessionListPanel

From 4b28d4a4a7897d8291c33a62e96b8b83abca6721 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 22:31:32 +0800
Subject: [PATCH 2/2] style(web): apply ElevenLabs design language to
 LiveVoiceSession (#1345)

Rewrite the voice panel UI with ElevenLabs-inspired dark theme:
- AudioVisualizer: larger canvas (400x80), 3px round-cap bars, state-based
  colors (zinc idle, blue hearing, emerald speaking), idle breathing animation
- Voice panel: backdrop-blur-xl, multi-layer dark shadow, h-52 with centered
  flex layout and generous spacing
- Pill-shaped controls: translucent mute (bg-white/5) and red-tinted end button
- Status label: uppercase 11px tracked text
- Interim text: italic muted with min-height to prevent layout jank
- PiChat toggle: active state shows red pulsing dot + "LIVE" pill badge

Closes #1345
---
 web/src/components/LiveVoiceSession.tsx | 159 ++++++++++++++++++------
 web/src/pages/PiChat.tsx                |  19 ++-
 2 files changed, 133 insertions(+), 45 deletions(-)

diff --git a/web/src/components/LiveVoiceSession.tsx b/web/src/components/LiveVoiceSession.tsx
index a08776ea..c247d2b1 100644
--- a/web/src/components/LiveVoiceSession.tsx
+++ b/web/src/components/LiveVoiceSession.tsx
@@ -68,9 +68,19 @@ type LiveVoiceSessionProps = {
 };
 
 // ---------------------------------------------------------------------------
-// Audio Visualizer (inline — replaces LiveKit Agents UI dependency)
+// Audio Visualizer — ElevenLabs-inspired waveform bars
 // ---------------------------------------------------------------------------
 
+/** Color palette keyed by voice state, adapted for dark theme. */
+const STATE_COLORS: Record<VoiceState, string> = {
+  idle: "#525252", // zinc-600 — muted, ambient
+  sending: "#525252", // same gray, pulsing animation signals "thinking"
+  speaking: "#10b981", // emerald-500 — Rara is speaking
+};
+
+/** Blue highlight when user is actively speaking (mic input detected). */
+const HEARING_COLOR = "#3b82f6"; // blue-500
+
 function AudioVisualizer({
   analyser,
   state,
@@ -80,6 +90,8 @@ function AudioVisualizer({
 }) {
   const canvasRef = useRef<HTMLCanvasElement>(null);
   const animFrameRef = useRef<number>(0);
+  // Persist per-bar phase offsets for idle breathing animation
+  const phaseOffsetsRef = useRef<number[] | null>(null);
 
   useEffect(() => {
     const canvas = canvasRef.current;
@@ -87,8 +99,24 @@ function AudioVisualizer({
     const ctx = canvas.getContext("2d");
     if (!ctx) return;
 
-    const BAR_COUNT = 32;
-    const dataArray = analyser ? new Uint8Array(analyser.frequencyBinCount) : null;
+    const BAR_COUNT = 40;
+    const BAR_WIDTH = 3;
+    const BAR_GAP = 2;
+    const dataArray = analyser
+      ? new Uint8Array(analyser.frequencyBinCount)
+      : null;
+
+    // Initialize stable random phase offsets (once)
+    if (!phaseOffsetsRef.current) {
+      phaseOffsetsRef.current = Array.from(
+        { length: BAR_COUNT },
+        () => Math.random() * Math.PI * 2,
+      );
+    }
+    const phaseOffsets = phaseOffsetsRef.current;
+
+    // Sending state: soft pulse via opacity oscillation
+    let sendingPhase = 0;
 
     function draw() {
       if (!ctx || !canvas) return;
@@ -96,36 +124,75 @@ function AudioVisualizer({
       const h = canvas.height;
       ctx.clearRect(0, 0, w, h);
 
-      // Determine color based on state
-      const color =
-        state === "sending"
-          ? "rgba(156, 163, 175, 0.5)" // gray — thinking
-          : state === "speaking"
-            ? "rgba(34, 197, 94, 0.7)" // green — speaking
-            : "rgba(59, 130, 246, 0.6)"; // blue — listening
-
       if (analyser && dataArray) {
         analyser.getByteFrequencyData(dataArray);
       }
 
-      const barWidth = w / BAR_COUNT - 2;
+      // Detect whether user is producing audio (hearing state)
+      let avgLevel = 0;
+      if (dataArray) {
+        let sum = 0;
+        for (let i = 0; i < dataArray.length; i++) sum += dataArray[i];
+        avgLevel = sum / dataArray.length;
+      }
+      const isHearing = state === "idle" && avgLevel > 12;
+
+      // Pick bar color
+      const color = isHearing ? HEARING_COLOR : STATE_COLORS[state];
+
+      // Sending pulse: oscillate global opacity
+      let globalAlpha = 1;
+      if (state === "sending") {
+        sendingPhase += 0.03;
+        globalAlpha = 0.4 + 0.3 * Math.sin(sendingPhase);
+      }
+
+      const totalBarsWidth = BAR_COUNT * (BAR_WIDTH + BAR_GAP) - BAR_GAP;
+      const offsetX = (w - totalBarsWidth) / 2;
       const centerY = h / 2;
+      const now = performance.now() / 1000; // seconds
+
+      ctx.lineCap = "round";
 
       for (let i = 0; i < BAR_COUNT; i++) {
         // Map frequency bin to bar height
         const binIndex = dataArray
           ? Math.floor((i / BAR_COUNT) * dataArray.length)
           : 0;
-        const value = dataArray ? dataArray[binIndex] : 0;
-        // Minimum bar height for idle state
-        const barHeight = Math.max(2, (value / 255) * (h * 0.8));
+        const rawValue = dataArray ? dataArray[binIndex] : 0;
+
+        let barHeight: number;
+        if (state === "idle" && !isHearing) {
+          // Idle breathing: gentle sinusoidal per-bar undulation
+          const breath =
+            Math.sin(now * 1.2 + phaseOffsets[i]) * 0.5 + 0.5; // 0..1
+          barHeight = 3 + breath * 6; // 3..9px — subtle
+        } else if (state === "sending") {
+          // Thinking: slow wave with moderate height
+          const wave =
+            Math.sin(now * 2 + (i / BAR_COUNT) * Math.PI * 2) * 0.5 + 0.5;
+          barHeight = 4 + wave * 14;
+        } else {
+          // Hearing or speaking: driven by audio data
+          barHeight = Math.max(3, (rawValue / 255) * (h * 0.85));
+        }
+
+        const x = offsetX + i * (BAR_WIDTH + BAR_GAP);
 
-        const x = i * (barWidth + 2) + 1;
+        ctx.globalAlpha = globalAlpha;
         ctx.fillStyle = color;
-        ctx.roundRect(x, centerY - barHeight / 2, barWidth, barHeight, 2);
+        ctx.beginPath();
+        ctx.roundRect(
+          x,
+          centerY - barHeight / 2,
+          BAR_WIDTH,
+          barHeight,
+          BAR_WIDTH / 2,
+        );
         ctx.fill();
       }
 
+      ctx.globalAlpha = 1;
       animFrameRef.current = requestAnimationFrame(draw);
     }
 
@@ -139,9 +206,9 @@ function AudioVisualizer({
   return (
     <canvas
       ref={canvasRef}
-      width={320}
-      height={48}
-      className="mx-auto block"
+      width={400}
+      height={80}
+      className="mx-auto block w-[60%] max-w-[400px]"
     />
   );
 }
@@ -154,6 +221,8 @@ function AudioVisualizer({
  * Bottom voice panel for real-time voice conversation.
  * Uses Web Speech API for continuous speech-to-text, sends transcribed text
  * through the existing WebSocket chat API, and displays a waveform visualizer.
+ *
+ * UI styled after ElevenLabs design language, adapted for dark theme.
  */
 export function LiveVoiceSession({
   getSessionKey,
@@ -387,62 +456,72 @@ export function LiveVoiceSession({
   // Status text
   // ---------------------------------------------------------------------------
 
-  const statusText =
+  const statusLabel =
     error ??
     (state === "sending"
-      ? "Thinking..."
+      ? "THINKING"
       : muted
-        ? "Muted"
-        : "Listening...");
+        ? "MUTED"
+        : "LISTENING");
 
   // ---------------------------------------------------------------------------
-  // Render
+  // Render — ElevenLabs-inspired dark voice panel
   // ---------------------------------------------------------------------------
 
   return (
-    <div className="absolute inset-x-0 bottom-0 z-50 flex h-48 flex-col items-center justify-center gap-3 border-t bg-background/95 px-4 backdrop-blur">
-      {/* Waveform visualizer */}
+    <div
+      className="absolute inset-x-0 bottom-0 z-50 flex h-52 flex-col items-center justify-center gap-4 border-t border-white/5 bg-background/95 px-4 backdrop-blur-xl"
+      style={{
+        boxShadow:
+          "rgba(255,255,255,0.03) 0px 0px 0px 1px inset, rgba(0,0,0,0.3) 0px -4px 16px",
+      }}
+    >
+      {/* Waveform visualizer — the hero element */}
       <AudioVisualizer analyser={analyser} state={state} />
 
       {/* Interim transcription or confirmed text */}
-      <div className="h-5 w-full max-w-md text-center">
+      <div className="min-h-[1.5em] w-full max-w-md text-center">
         {state === "sending" && finalText ? (
-          <span className="text-sm text-foreground truncate block">
+          <span className="block truncate text-sm tracking-wide text-muted-foreground/70">
             {finalText}
           </span>
         ) : interimText ? (
-          <span className="text-sm italic text-muted-foreground truncate block">
+          <span className="block truncate text-sm italic text-muted-foreground/50">
             {interimText}
           </span>
         ) : null}
       </div>
 
-      {/* Status text */}
-      <div className="text-xs text-muted-foreground">{statusText}</div>
+      {/* Status label — uppercase, tracked, small */}
+      <div className="text-[11px] font-medium uppercase tracking-[0.15em] text-muted-foreground/70">
+        {statusLabel}
+      </div>
 
-      {/* Control bar */}
-      <div className="flex items-center gap-4">
+      {/* Control bar — pill buttons */}
+      <div className="flex items-center gap-8">
         {/* Mute button */}
         <button
           onClick={toggleMute}
           disabled={state === "sending"}
-          className={`flex h-10 w-10 cursor-pointer items-center justify-center rounded-full transition-colors ${
+          className={`flex cursor-pointer items-center gap-2 rounded-full px-5 py-2 text-sm transition-all ${
             muted
-              ? "bg-destructive text-destructive-foreground hover:bg-destructive/90"
-              : "bg-secondary text-secondary-foreground hover:bg-secondary/80"
+              ? "bg-red-500/10 text-red-400 hover:bg-red-500/20"
+              : "bg-white/5 text-muted-foreground hover:bg-white/10"
           }`}
           title={muted ? "Unmute" : "Mute"}
         >
-          {muted ? <MicOff size={18} /> : <Mic size={18} />}
+          {muted ? <MicOff size={16} /> : <Mic size={16} />}
+          <span>{muted ? "Unmute" : "Mute"}</span>
         </button>
 
         {/* End session button */}
         <button
           onClick={handleClose}
-          className="flex h-10 w-10 cursor-pointer items-center justify-center rounded-full bg-destructive text-destructive-foreground transition-colors hover:bg-destructive/90"
+          className="flex cursor-pointer items-center gap-2 rounded-full px-5 py-2 text-sm text-red-400 transition-all hover:bg-red-500/10"
           title="End voice session"
         >
-          <PhoneOff size={18} />
+          <PhoneOff size={16} />
+          <span>End</span>
         </button>
       </div>
     </div>
diff --git a/web/src/pages/PiChat.tsx b/web/src/pages/PiChat.tsx
index 63312284..eb91b5ff 100644
--- a/web/src/pages/PiChat.tsx
+++ b/web/src/pages/PiChat.tsx
@@ -406,9 +406,18 @@ export default function PiChat() {
           <path d="M3 12h18M3 6h18M3 18h18" />
         </svg>
       </button>
-      {/* Voice button — fixed top-right */}
-      {!liveMode && (
-        <div className="absolute right-2 top-2 z-50">
+      {/* Voice toggle — fixed top-right */}
+      <div className="absolute right-2 top-2 z-50">
+        {liveMode ? (
+          <button
+            onClick={() => setLiveMode(false)}
+            className="flex cursor-pointer items-center gap-1.5 rounded-full bg-red-500/10 px-3 py-1.5 text-xs font-medium text-red-400 transition-all hover:bg-red-500/20"
+            title="End live voice chat"
+          >
+            <span className="h-2 w-2 animate-pulse rounded-full bg-red-500" />
+            LIVE
+          </button>
+        ) : (
           <button
             onClick={() => setLiveMode(true)}
             className="flex h-11 w-11 cursor-pointer items-center justify-center rounded-full bg-background/80 text-muted-foreground shadow-md backdrop-blur transition-all hover:bg-secondary hover:text-foreground"
@@ -420,8 +429,8 @@ export default function PiChat() {
               <line x1="12" y1="19" x2="12" y2="22" />
             </svg>
           </button>
-        </div>
-      )}
+        )}
+      </div>
       {/* Chat panel container */}
       <div ref={containerRef} className="h-full w-full" />
       {/* Live voice panel — bottom overlay */}