diff --git a/.env.example b/.env.example index 42bdcf2c91..e2bd56e05a 100644 --- a/.env.example +++ b/.env.example @@ -21,3 +21,39 @@ POSTGRES_URL=**** # Instructions to create a Redis store here: # https://vercel.com/docs/redis REDIS_URL=**** + +# ================================ +# WEB SEARCH (OPTIONAL) +# ================================ +# Tavily API enables the AI to search the web for real-time information. +# Get your API key from https://tavily.com +# If not configured, the AI will indicate that web search is unavailable +# when users request current information. The chat will continue to work +# normally for all other queries. +# TAVILY_API_KEY="..." + + +# ================================ +# VOICE FEATURES (OPTIONAL) +# ================================ +# The voice agent feature requires several external services. +# If these are not configured, voice features will be disabled +# but the rest of the app will work normally. + +# Deepgram API (Speech-to-Text) +# Get your API key from https://deepgram.com +# DEEPGRAM_API_KEY="..." + +# Cartesia API (Text-to-Speech) +# Get your API key from https://cartesia.ai +# Cartesia is used to synthesize the text response into speech. +# https://play.cartesia.ai/console +# CARTESIA_API_KEY="..." + + +# NLP Worker Service (End-of-Turn Detection) +# This is a custom service for detecting when users finish speaking. +# If not configured, the app will fall back to simple heuristics. +# See VOICE_FEATURES.md for setup instructions. +# NLP_WORKER_URL=http://localhost:8097 +# NLP_WORKER_API_KEY=your-nlp-worker-api-key diff --git a/.gitignore b/.gitignore index 864e97de91..72ab8b6819 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,11 @@ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. +# Generated AI artifacts +.ai-docs/ +.playwright-mcp/ +.cursor/plans/ +.commit.msg.md + # dependencies node_modules .pnp @@ -42,3 +48,12 @@ yarn-error.log* /playwright-report/ /blob-report/ /playwright/* + +# VAD model files (large binaries, served from public/) +/public/*.onnx +/public/*.wasm +/public/*.mjs +/public/vad.worklet.bundle.min.js + +*.disabled* +**/*.disabled* diff --git a/README.md b/README.md index 4df063825a..a4f292a47e 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,11 @@ - [Vercel Blob](https://vercel.com/storage/blob) for efficient file storage - [Auth.js](https://authjs.dev) - Simple and secure authentication +- Voice Agent (Optional) + - Real-time speech-to-text with Deepgram + - Natural text-to-speech with Cartesia + - Smart end-of-turn detection with graceful fallbacks + - See [VOICE_FEATURES.md](VOICE_FEATURES.md) for setup ## Model Providers @@ -54,9 +59,9 @@ You can deploy your own version of the Next.js AI Chatbot to Vercel with one cli ## Running locally -You will need to use the environment variables [defined in `.env.example`](.env.example) to run Next.js AI Chatbot. It's recommended you use [Vercel Environment Variables](https://vercel.com/docs/projects/environment-variables) for this, but a `.env` file is all that is necessary. +You will need to use the environment variables [defined in `.env.example`](.env.example) to run Next.js AI Chatbot. It's recommended you use [Vercel Environment Variables](https://vercel.com/docs/projects/environment-variables) for this, but a `.env.local` file is all that is necessary. -> Note: You should not commit your `.env` file or it will expose secrets that will allow others to control access to your various AI and authentication provider accounts. +> Note: You should not commit your `.env.local` file or it will expose secrets that will allow others to control access to your various AI and authentication provider accounts. 1. Install Vercel CLI: `npm i -g vercel` 2. Link local instance with Vercel and GitHub accounts (creates `.vercel` directory): `vercel link` @@ -69,3 +74,19 @@ pnpm dev ``` Your app template should now be running on [localhost:3000](http://localhost:3000). + +### Optional: Voice Features + +Voice features require additional API keys. See [VOICE_FEATURES.md](VOICE_FEATURES.md) for detailed setup instructions. + +```bash +# Add to your .env.local file +NEXT_PUBLIC_DEEPGRAM_API_KEY=your-deepgram-key +NEXT_PUBLIC_CARTESIA_API_KEY=your-cartesia-key + +# Optional: Enhanced end-of-turn detection +NLP_WORKER_URL=http://localhost:8097 +NLP_WORKER_API_KEY=your-api-key +``` + +The app works perfectly fine without these - voice features are completely optional. diff --git a/app/(auth)/login/page.tsx b/app/(auth)/login/page.tsx index c9fb3f1d28..0d3940386f 100644 --- a/app/(auth)/login/page.tsx +++ b/app/(auth)/login/page.tsx @@ -41,8 +41,7 @@ export default function Page() { updateSession(); router.refresh(); } - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [state.status]); + }, [state.status, updateSession, router]); const handleSubmit = (formData: FormData) => { setEmail(formData.get("email") as string); diff --git a/app/(auth)/register/page.tsx b/app/(auth)/register/page.tsx index 7af8161833..f9ae6789a0 100644 --- a/app/(auth)/register/page.tsx +++ b/app/(auth)/register/page.tsx @@ -35,14 +35,16 @@ export default function Page() { description: "Failed validating your submission!", }); } else if (state.status === "success") { - toast({ type: "success", description: "Account created successfully!" }); + toast({ + type: "success", + description: "Account created successfully!", + }); setIsSuccessful(true); updateSession(); router.refresh(); } - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [state.status]); + }, [state.status, updateSession, router]); const handleSubmit = (formData: FormData) => { setEmail(formData.get("email") as string); diff --git a/app/(chat)/actions.ts b/app/(chat)/actions.ts index 5bc9b4c216..dde22902f4 100644 --- a/app/(chat)/actions.ts +++ b/app/(chat)/actions.ts @@ -3,8 +3,8 @@ import { generateText, type UIMessage } from "ai"; import { cookies } from "next/headers"; import type { VisibilityType } from "@/components/visibility-selector"; -import { myProvider } from "@/lib/ai/providers"; import { titlePrompt } from "@/lib/ai/prompts"; +import { myProvider } from "@/lib/ai/providers"; import { deleteMessagesByChatIdAfterTimestamp, getMessageById, diff --git a/app/(chat)/api/chat/route.ts b/app/(chat)/api/chat/route.ts index 52d41fb8aa..537c6fd13b 100644 --- a/app/(chat)/api/chat/route.ts +++ b/app/(chat)/api/chat/route.ts @@ -1,3 +1,4 @@ +/** biome-ignore-all lint/correctness/noUnusedImports: refactor testing */ import { geolocation } from "@vercel/functions"; import { convertToModelMessages, @@ -25,6 +26,7 @@ import { myProvider } from "@/lib/ai/providers"; import { createDocument } from "@/lib/ai/tools/create-document"; import { getWeather } from "@/lib/ai/tools/get-weather"; import { requestSuggestions } from "@/lib/ai/tools/request-suggestions"; +import { searchWeb } from "@/lib/ai/tools/search-web"; import { updateDocument } from "@/lib/ai/tools/update-document"; import { isProductionEnvironment } from "@/lib/constants"; import { @@ -47,6 +49,8 @@ import { type PostRequestBody, postRequestBodySchema } from "./schema"; export const maxDuration = 60; +const ENABLE_RATE_LIMITING = true; + let globalStreamContext: ResumableStreamContext | null = null; const getTokenlensCatalog = cache( @@ -116,13 +120,15 @@ export async function POST(request: Request) { const userType: UserType = session.user.type; - const messageCount = await getMessageCountByUserId({ - id: session.user.id, - differenceInHours: 24, - }); + if (ENABLE_RATE_LIMITING) { + const messageCount = await getMessageCountByUserId({ + id: session.user.id, + differenceInHours: 24, + }); - if (messageCount > entitlementsByUserType[userType].maxMessagesPerDay) { - return new ChatSDKError("rate_limit:chat").toResponse(); + if (messageCount > entitlementsByUserType[userType].maxMessagesPerDay) { + return new ChatSDKError("rate_limit:chat").toResponse(); + } } const chat = await getChatById({ id }); @@ -189,6 +195,7 @@ export async function POST(request: Request) { ? [] : [ "getWeather", + "searchWeb", "createDocument", "updateDocument", "requestSuggestions", @@ -196,6 +203,7 @@ export async function POST(request: Request) { experimental_transform: smoothStream({ chunking: "word" }), tools: { getWeather, + searchWeb, createDocument: createDocument({ session, dataStream }), updateDocument: updateDocument({ session, dataStream }), requestSuggestions: requestSuggestions({ @@ -230,13 +238,27 @@ export async function POST(request: Request) { return; } - const summary = getUsage({ modelId, usage, providers }); - finalMergedUsage = { ...usage, ...summary, modelId } as AppUsage; - dataStream.write({ type: "data-usage", data: finalMergedUsage }); + const summary = getUsage({ + modelId, + usage, + providers, + }); + finalMergedUsage = { + ...usage, + ...summary, + modelId, + } as AppUsage; + dataStream.write({ + type: "data-usage", + data: finalMergedUsage, + }); } catch (err) { console.warn("TokenLens enrichment failed", err); finalMergedUsage = usage; - dataStream.write({ type: "data-usage", data: finalMergedUsage }); + dataStream.write({ + type: "data-usage", + data: finalMergedUsage, + }); } }, }); diff --git a/app/(chat)/api/history/route.ts b/app/(chat)/api/history/route.ts index 2525a9a1f0..23615e305a 100644 --- a/app/(chat)/api/history/route.ts +++ b/app/(chat)/api/history/route.ts @@ -1,6 +1,6 @@ import type { NextRequest } from "next/server"; import { auth } from "@/app/(auth)/auth"; -import { getChatsByUserId, deleteAllChatsByUserId } from "@/lib/db/queries"; +import { deleteAllChatsByUserId, getChatsByUserId } from "@/lib/db/queries"; import { ChatSDKError } from "@/lib/errors"; export async function GET(request: NextRequest) { diff --git a/app/(chat)/api/voice/deepgram-token/route.ts b/app/(chat)/api/voice/deepgram-token/route.ts new file mode 100644 index 0000000000..47c0931c12 --- /dev/null +++ b/app/(chat)/api/voice/deepgram-token/route.ts @@ -0,0 +1,86 @@ +/** + * Deepgram Temporary Token Generation API Route + * + * **Endpoint**: POST /api/voice/deepgram-token + * + * **Purpose**: Generates short-lived JWT tokens for secure client-side + * Deepgram API access without exposing permanent API keys. + * + * **Security**: + * - Token TTL: 30 seconds (Deepgram default) + * - Scopes: usage:write only (can't create keys, access billing, etc.) + * - WebSocket connections stay open beyond token expiry + * - Never exposes main API key to client + * + * **Response**: + * ```json + * { + * "token": "eyJ0eXAiOiJKV1QiLCJhbGc...", + * "expires_in": 30 + * } + * ``` + * + * **Usage in Frontend**: + * ```typescript + * const { token } = await fetch('/api/voice/deepgram-token', { method: 'POST' }); + * const deepgram = createClient(token); + * const connection = deepgram.listen.live({ ... }); + * ``` + * + * **Environment Variables**: + * - DEEPGRAM_API_KEY: Main Deepgram API key (server-side only) + * + * @see hooks/use-deepgram-stream.ts - Frontend Deepgram connection + * @see https://developers.deepgram.com/docs/token-based-authentication + */ + +import { NextResponse } from "next/server"; + +export async function POST() { + try { + const apiKey = process.env.DEEPGRAM_API_KEY; + + if (!apiKey) { + console.error("Missing DEEPGRAM_API_KEY environment variable"); + return NextResponse.json( + { error: "Deepgram API key not configured" }, + { status: 500 } + ); + } + + // Generate temporary JWT token via /v1/auth/grant endpoint + // This creates a 30-second token that allows usage:write access + const response = await fetch("https://api.deepgram.com/v1/auth/grant", { + method: "POST", + headers: { + Authorization: `Token ${apiKey}`, + "Content-Type": "application/json", + }, + }); + + if (!response.ok) { + const error = await response.text(); + console.error("Deepgram token generation failed:", error); + return NextResponse.json( + { error: "Failed to generate token" }, + { status: response.status } + ); + } + + const data = await response.json(); + + return NextResponse.json({ + token: data.access_token, + expires_in: data.expires_in, + }); + } catch (error) { + console.error("Deepgram token generation error:", error); + return NextResponse.json( + { + error: + error instanceof Error ? error.message : "Failed to generate token", + }, + { status: 500 } + ); + } +} diff --git a/app/(chat)/api/voice/detect-eot/route.ts b/app/(chat)/api/voice/detect-eot/route.ts new file mode 100644 index 0000000000..923d3dcacd --- /dev/null +++ b/app/(chat)/api/voice/detect-eot/route.ts @@ -0,0 +1,173 @@ +/** + * End of Turn (EOT) Detection API Route + * + * **Endpoint**: POST /api/voice/detect-eot + * + * **Purpose**: Determines if a user has finished speaking or is just pausing. + * Uses the Python nlp-worker's EOT endpoint which runs the livekit/turn-detector model. + * + * **Optional Service**: This endpoint gracefully falls back to simple heuristics + * when the NLP Worker service is not available. The app will continue to function + * normally without the service. + * + * **Request Body**: + * ```json + * { + * "chatHistory": [ + * { "role": "user", "content": "Hello" }, + * { "role": "assistant", "content": "Hi there!" }, + * { "role": "user", "content": "Can you help me with" } + * ] + * } + * ``` + * + * **Response**: + * ```json + * { + * "eou_probability": 0.042, + * "unlikely_threshold": 0.15, + * "is_end_of_utterance": false, + * "_fallback": false // true if using heuristics instead of ML model + * } + * ``` + * + * **Fallback Behavior**: + * When NLP Worker is unavailable, uses simple heuristics: + * - Checks for ending punctuation (., !, ?) + * - Requires minimum word count (3+ words) + * - Works well for complete sentences + * + * **Integration with VAD**: + * Call this after VAD detects silence and transcription completes. + * Use `is_end_of_utterance` to decide whether to send message or keep listening. + * + * **Environment Variables (Optional)**: + * - NLP_WORKER_URL: URL of nlp-worker (default: http://localhost:8097) + * - NLP_WORKER_API_KEY: API key for authentication + * + * @see lib/nlp-worker-client.ts - Client implementation + * @see hooks/use-voice-input.ts - VAD integration example + * @see VOICE_FEATURES.md - Setup and architecture documentation + */ + +import { NextResponse } from "next/server"; +import type { EOTChatMessage } from "@/lib/nlp-worker-client"; +import { nlpWorkerClient } from "@/lib/nlp-worker-client"; + +const DEBUG = process.env.DEBUG === "true"; +const END_PUNCTUATION_REGEX = /[.!?]$/; +const WORD_COUNT_REGEX = /\s+/; + +/** + * Fallback EOT detection using simple heuristics + * + * This provides basic but effective end-of-turn detection when the + * NLP Worker service is unavailable. + * + * @param transcript - The user's current utterance + * @returns EOT response with heuristic-based decision + */ +function fallbackEOTDetection(transcript: string) { + const trimmed = transcript.trim(); + + // Check for ending punctuation + const hasEndPunctuation = END_PUNCTUATION_REGEX.test(trimmed); + + // Count words + const wordCount = trimmed + .split(WORD_COUNT_REGEX) + .filter((word) => word.length > 0).length; + + // Simple but effective rules: + // 1. Must end with punctuation + // 2. Must have at least 3 words (avoid "Yes." or "Ok.") + const isComplete = hasEndPunctuation && wordCount >= 3; + + // Calculate a simple "probability" based on heuristics + let probability = 0.0; + + if (hasEndPunctuation) { + probability += 0.5; + } + if (wordCount >= 3) { + probability += 0.3; + } + if (wordCount >= 5) { + probability += 0.2; + } + + return { + eou_probability: probability, + unlikely_threshold: 0.15, + is_end_of_utterance: isComplete, + _fallback: true, + _method: "heuristic", + }; +} + +export async function POST(request: Request) { + try { + const body = await request.json(); + const { chatHistory } = body as { chatHistory: EOTChatMessage[] }; + + // Validate input + if (!chatHistory || !Array.isArray(chatHistory)) { + return NextResponse.json( + { error: "chatHistory must be an array" }, + { status: 400 } + ); + } + + if (chatHistory.length === 0) { + return NextResponse.json( + { error: "chatHistory must contain at least one message" }, + { status: 400 } + ); + } + + // Extract the current utterance for fallback + const lastMessage = chatHistory.at(-1); + const currentUtterance = lastMessage?.content || ""; + + // Try NLP Worker first if configured + if (nlpWorkerClient.isConfigured()) { + const result = await nlpWorkerClient.detectEOT(chatHistory); + + // If successful, return the result + if (!("error" in result)) { + return NextResponse.json({ + ...result, + _fallback: false, + _method: "ml_model", + }); + } + + if (DEBUG) { + console.log( + "[EOT] NLP Worker unavailable, using fallback:", + result.error + ); + } + } + + // Use fallback heuristics + const fallbackResult = fallbackEOTDetection(currentUtterance); + + return NextResponse.json(fallbackResult, { status: 200 }); + } catch (error) { + console.error("[EOT] Unexpected API error:", error); + + // Even in case of unexpected errors, try to provide a reasonable fallback + return NextResponse.json( + { + eou_probability: 0.5, + unlikely_threshold: 0.15, + is_end_of_utterance: true, + _fallback: true, + _method: "error_fallback", + _error: error instanceof Error ? error.message : "Unknown error", + }, + { status: 200 } + ); + } +} diff --git a/app/(chat)/api/voice/synthesize/route.ts b/app/(chat)/api/voice/synthesize/route.ts new file mode 100644 index 0000000000..aa2004a0d5 --- /dev/null +++ b/app/(chat)/api/voice/synthesize/route.ts @@ -0,0 +1,143 @@ +/** + * Text-to-Speech (TTS) API Route + * + * **Endpoint**: POST /api/voice/synthesize + * + * **Purpose**: Converts text to natural-sounding speech audio using Cartesia's Sonic TTS model. + * Returns streaming PCM audio data that can be played directly via the Web Audio API. + * + * **Used By**: + * - `components/message-actions.tsx` - Speaker button on assistant messages + * - `hooks/use-player.ts` - Audio playback hook consumes the stream + * + * **Integration**: + * This endpoint is called when a user clicks the speaker button on an assistant message + * (when TTS is enabled). The returned audio stream is passed to the usePlayer hook which + * uses the Web Audio API to play the synthesized speech in real-time. + * + * **Request Body**: + * ```json + * { + * "text": "Hello world", // Required: Text to synthesize + * "voiceId": "79a125e8-cd45-4c13-8a67-188112f4dd22" // Optional: Cartesia voice ID + * } + * ``` + * + * **Response**: + * - Success: Streaming audio/raw response with PCM_F32LE audio data + * - Error 400: No text provided + * - Error 500: CARTESIA_API_KEY not configured or synthesis failed + * + * **Audio Format**: + * - Container: Raw (no container, pure audio data) + * - Encoding: PCM_F32LE (32-bit float, little-endian) + * - Sample Rate: 24,000 Hz + * - Channels: 1 (mono) + * + * **Cartesia Sonic Model**: + * - Model: sonic-english + * - Provider: Cartesia AI (https://cartesia.ai) + * - Features: Low-latency streaming, natural prosody, emotional expression + * - Latency: ~200-500ms to first audio byte + * + * **Environment Variables Required**: + * - CARTESIA_API_KEY: Get from https://cartesia.ai + * + * **Flow**: + * 1. Client sends POST with text to synthesize + * 2. Server validates text and API key + * 3. Calls Cartesia TTS API with text and voice settings + * 4. Streams raw PCM audio back to client + * 5. Client's usePlayer hook decodes and plays via AudioContext + * + * **Voice ID**: Default voice is a natural-sounding English voice. + * Browse available voices at: https://docs.cartesia.ai/api-reference/tts/voices + * + * @param request - Next.js request object containing JSON body with text and optional voiceId + * @returns Streaming Response with raw PCM audio data or JSON error + * + * @example + * ```typescript + * // Client-side usage (from message-actions.tsx): + * const response = await fetch("/api/voice/synthesize", { + * method: "POST", + * headers: { "Content-Type": "application/json" }, + * body: JSON.stringify({ text: "Hello world" }), + * }); + * + * if (response.ok && response.body) { + * player.play(response.body, () => console.log("Playback complete")); + * } + * ``` + * + * @see hooks/use-player.ts - Audio playback consumer + * @see components/message-actions.tsx - UI trigger for TTS + * @see https://docs.cartesia.ai/api-reference/tts/bytes - Cartesia API docs + */ +import { NextResponse } from "next/server"; + +/** + * POST handler for text-to-speech synthesis. + * + * Accepts text input and returns streaming PCM audio from Cartesia Sonic. + */ +export async function POST(request: Request) { + try { + const { text, voiceId } = await request.json(); + + if (!text) { + return NextResponse.json({ error: "No text provided" }, { status: 400 }); + } + + if (!process.env.CARTESIA_API_KEY) { + return NextResponse.json( + { error: "CARTESIA_API_KEY is not configured" }, + { status: 500 } + ); + } + + const response = await fetch("https://api.cartesia.ai/tts/bytes", { + method: "POST", + headers: { + "Cartesia-Version": "2024-06-30", + "Content-Type": "application/json", + "X-API-Key": process.env.CARTESIA_API_KEY, + }, + body: JSON.stringify({ + model_id: "sonic-english", + transcript: text, + voice: { + mode: "id", + id: voiceId || "79a125e8-cd45-4c13-8a67-188112f4dd22", // Default voice + }, + output_format: { + container: "raw", + encoding: "pcm_f32le", + sample_rate: 24_000, + }, + }), + }); + + if (!response.ok) { + const error = await response.text(); + console.error("Cartesia API error:", error); + return NextResponse.json( + { error: "Voice synthesis failed" }, + { status: 500 } + ); + } + + // Return the streaming audio response + return new Response(response.body, { + headers: { + "Content-Type": "audio/raw", + }, + }); + } catch (error) { + console.error("TTS error:", error); + return NextResponse.json( + { error: "Voice synthesis failed" }, + { status: 500 } + ); + } +} diff --git a/app/(chat)/api/voice/token/route.ts b/app/(chat)/api/voice/token/route.ts new file mode 100644 index 0000000000..b233f38321 --- /dev/null +++ b/app/(chat)/api/voice/token/route.ts @@ -0,0 +1,89 @@ +/** + * Cartesia Access Token Generation API Route + * + * **Endpoint**: POST /api/voice/token + * + * **Purpose**: Generates short-lived Cartesia access tokens for client-side TTS. + * Allows secure client-side streaming without exposing the API key. + * + * **Used By**: + * - Voice Agent mode in `components/chat.tsx` + * - Client-side TTS streaming for low-latency audio playback + * + * **Security**: + * - Tokens expire in 60 seconds + * - Only TTS grant is provided + * - API key remains secure on server + * + * **Response**: + * ```json + * { + * "token": "temporary_access_token_here", + * "expiresIn": 60 + * } + * ``` + * + * **Error Responses**: + * - 500: CARTESIA_API_KEY not configured or token generation failed + * + * @see https://docs.cartesia.ai/get-started/authenticate-your-client-applications + */ +import { NextResponse } from "next/server"; + +export async function POST() { + try { + if (!process.env.CARTESIA_API_KEY) { + return NextResponse.json( + { error: "CARTESIA_API_KEY is not configured" }, + { status: 500 } + ); + } + + const response = await fetch("https://api.cartesia.ai/access-token", { + method: "POST", + headers: { + "Cartesia-Version": "2024-06-30", + "Content-Type": "application/json", + Authorization: `Bearer ${process.env.CARTESIA_API_KEY}`, + }, + body: JSON.stringify({ + grants: { tts: true }, + expires_in: 60, // 60 seconds + }), + }); + + if (!response.ok) { + const error = await response.text(); + console.error("Cartesia token generation error:", error); + return NextResponse.json( + { error: "Token generation failed" }, + { status: 500 } + ); + } + + const data = await response.json(); + + console.log("Cartesia token API response received"); + console.log("- Token field:", data.token ? "present" : "MISSING"); + console.log("- Token length:", data.token?.length || 0); + + if (!data.token) { + console.error("Cartesia API returned no token:", data); + return NextResponse.json( + { error: "Token generation failed - no token in response" }, + { status: 500 } + ); + } + + return NextResponse.json({ + token: data.token, // Cartesia returns "token" not "access_token" + expiresIn: 60, + }); + } catch (error) { + console.error("Token generation error:", error); + return NextResponse.json( + { error: "Token generation failed" }, + { status: 500 } + ); + } +} diff --git a/app/(chat)/api/voice/transcribe/route.ts b/app/(chat)/api/voice/transcribe/route.ts new file mode 100644 index 0000000000..cea8b28bd5 --- /dev/null +++ b/app/(chat)/api/voice/transcribe/route.ts @@ -0,0 +1,128 @@ +/** + * Speech-to-Text (STT) Transcription API Route + * + * **Endpoint**: POST /api/voice/transcribe + * + * **Purpose**: Transcribes audio input to text using Groq's Whisper large-v3 model. + * Accepts audio files in various formats (WAV, WebM, MP3, etc.) and returns + * the transcribed text as JSON. + * + * **Used By**: + * - `hooks/use-voice-input.ts` - Both VAD and PTT modes send audio here + * - Called after user stops speaking (VAD) or stops recording (PTT) + * + * **Request Format**: + * - Method: POST + * - Content-Type: multipart/form-data + * - Body: FormData with "audio" field containing audio file + * + * **Audio File**: + * - Field name: "audio" + * - Supported formats: WAV, WebM, MP3, M4A, etc. (Whisper accepts most formats) + * - VAD sends: WAV format (encoded from Float32Array) + * - PTT sends: WebM format with opus codec (browser MediaRecorder output) + * + * **Response**: + * - Success (200): `{ transcript: "transcribed text here" }` + * - Error (400): No audio file, or empty transcription + * - Error (500): GROQ_API_KEY not configured, or API failure + * + * **Groq Whisper Model**: + * - Model: whisper-large-v3 + * - Provider: Groq (https://groq.com) + * - Features: Multi-language support, high accuracy, fast inference + * - Speed: ~500-1000ms typical transcription time + * - Languages: 99+ languages supported (auto-detected) + * + * **Environment Variables Required**: + * - GROQ_API_KEY: Get from https://console.groq.com/keys + * + * **Error Handling**: + * - Empty audio returns 400 "No transcript generated" + * - API failures return 500 "Transcription failed" + * - All errors logged to console for debugging + * + * **Flow**: + * 1. Client captures audio (VAD or PTT) + * 2. Audio sent as FormData multipart upload + * 3. Server extracts File from FormData + * 4. Validates file exists and API key configured + * 5. Calls Groq Whisper API with audio file + * 6. Returns trimmed transcript text + * 7. Client displays text in chat input field + * + * @param request - Next.js request with FormData containing audio file + * @returns JSON response with transcript or error message + * + * @example + * ```typescript + * // Client-side usage (from use-voice-input.ts): + * const formData = new FormData(); + * formData.append("audio", audioBlob, "audio.wav"); + * + * const response = await fetch("/api/voice/transcribe", { + * method: "POST", + * body: formData, + * }); + * + * const { transcript } = await response.json(); + * setInput(transcript); // Display in chat input + * ``` + * + * @see hooks/use-voice-input.ts - Audio capture and API caller + * @see https://console.groq.com/docs/speech-text - Groq Whisper API docs + */ +import Groq from "groq-sdk"; +import { NextResponse } from "next/server"; + +const groq = new Groq({ + apiKey: process.env.GROQ_API_KEY, +}); + +/** + * POST handler for audio transcription. + * + * Accepts audio file via FormData and returns transcribed text via Groq Whisper. + */ +export async function POST(request: Request) { + try { + const formData = await request.formData(); + const audioFile = formData.get("audio") as File; + + if (!audioFile) { + return NextResponse.json( + { error: "No audio file provided" }, + { status: 400 } + ); + } + + if (!process.env.GROQ_API_KEY) { + return NextResponse.json( + { error: "GROQ_API_KEY is not configured" }, + { status: 500 } + ); + } + + const transcription = await groq.audio.transcriptions.create({ + file: audioFile, + model: "whisper-large-v3", + }); + + const transcript = transcription.text.trim(); + + if (!transcript) { + return NextResponse.json( + { error: "No transcript generated" }, + { status: 400 } + ); + } + + return NextResponse.json({ transcript }); + } catch (error) { + console.error("Transcription error:", error); + return NextResponse.json( + { error: "Transcription failed" }, + { status: 500 } + ); + } +} diff --git a/app/context/deepgram-context-provider.tsx b/app/context/deepgram-context-provider.tsx new file mode 100644 index 0000000000..55a3cb1810 --- /dev/null +++ b/app/context/deepgram-context-provider.tsx @@ -0,0 +1,127 @@ +"use client"; + +import { + createClient, + type LiveClient, + type LiveSchema, + LiveTranscriptionEvents, + SOCKET_STATES, +} from "@deepgram/sdk"; + +import { + createContext, + type FunctionComponent, + type ReactNode, + useContext, + useState, +} from "react"; + +export type { LiveTranscriptionEvent } from "@deepgram/sdk"; +// biome-ignore lint/performance/noBarrelFile: Re-exporting Deepgram enums for convenience +export { + LiveTranscriptionEvents, + SOCKET_STATES as LiveConnectionState, +} from "@deepgram/sdk"; + +type DeepgramContextType = { + connection: LiveClient | null; + connectToDeepgram: (options: LiveSchema, endpoint?: string) => Promise; + disconnectFromDeepgram: () => void; + connectionState: SOCKET_STATES; +}; + +const DeepgramContext = createContext( + undefined +); + +type DeepgramContextProviderProps = { + children: ReactNode; +}; + +const getToken = async (): Promise => { + const response = await fetch("/api/voice/deepgram-token", { + method: "POST", + cache: "no-store", + }); + const result = await response.json(); + return result.token; // Our endpoint returns {token, expires_in} +}; + +const DeepgramContextProvider: FunctionComponent< + DeepgramContextProviderProps +> = ({ children }) => { + const [connection, setConnection] = useState(null); + const [connectionState, setConnectionState] = useState( + SOCKET_STATES.closed + ); + + /** + * Connects to the Deepgram speech recognition service and sets up a live transcription session. + * + * @param options - The configuration options for the live transcription session. + * @param endpoint - The optional endpoint URL for the Deepgram service. + * @returns A Promise that resolves when the connection is established. + */ + const connectToDeepgram = async (options: LiveSchema, endpoint?: string) => { + console.log("DeepgramContext: Getting token..."); + const token = await getToken(); + console.log("DeepgramContext: Token received, creating client..."); + const deepgram = createClient({ accessToken: token }); // Use token directly + + console.log("DeepgramContext: Creating live connection..."); + const conn = deepgram.listen.live(options, endpoint); + + conn.addListener(LiveTranscriptionEvents.Open, () => { + console.log("DeepgramContext: WebSocket OPEN event received"); + setConnectionState(SOCKET_STATES.open); + }); + + conn.addListener(LiveTranscriptionEvents.Close, () => { + console.log("DeepgramContext: WebSocket CLOSE event received"); + setConnectionState(SOCKET_STATES.closed); + }); + + conn.addListener(LiveTranscriptionEvents.Metadata, (data: any) => { + console.log("DeepgramContext: Metadata received", data); + }); + + conn.addListener(LiveTranscriptionEvents.Error, (error: any) => { + console.error("DeepgramContext: Error", error); + }); + + console.log("DeepgramContext: Setting connection"); + setConnection(conn); + }; + + const disconnectFromDeepgram = () => { + if (connection) { + connection.finish(); + setConnection(null); + } + }; + + return ( + + {children} + + ); +}; + +function useDeepgram(): DeepgramContextType { + const context = useContext(DeepgramContext); + if (context === undefined) { + throw new Error( + "useDeepgram must be used within a DeepgramContextProvider" + ); + } + return context; +} + +export { DeepgramContextProvider, useDeepgram }; diff --git a/app/context/microphone-context-provider.tsx b/app/context/microphone-context-provider.tsx new file mode 100644 index 0000000000..b838556e4b --- /dev/null +++ b/app/context/microphone-context-provider.tsx @@ -0,0 +1,189 @@ +"use client"; + +import { + createContext, + type ReactNode, + useCallback, + useContext, + useState, +} from "react"; + +type MicrophoneContextType = { + microphone: MediaRecorder | null; + startMicrophone: () => void; + stopMicrophone: () => void; + setupMicrophone: () => void; + microphoneState: MicrophoneState | null; + mediaStream: MediaStream | null; +}; + +export const MicrophoneEvents = { + DataAvailable: "dataavailable", + Error: "error", + Pause: "pause", + Resume: "resume", + Start: "start", + Stop: "stop", +} as const; + +export type MicrophoneEvents = + (typeof MicrophoneEvents)[keyof typeof MicrophoneEvents]; + +export const MicrophoneState = { + NotSetup: -1, + SettingUp: 0, + Ready: 1, + Opening: 2, + Open: 3, + Error: 4, + Pausing: 5, + Paused: 6, +} as const; + +export type MicrophoneState = + (typeof MicrophoneState)[keyof typeof MicrophoneState]; + +const MicrophoneContext = createContext( + undefined +); + +type MicrophoneContextProviderProps = { + children: ReactNode; +}; + +const MicrophoneContextProvider: React.FC = ({ + children, +}) => { + const [microphoneState, setMicrophoneState] = useState( + MicrophoneState.NotSetup + ); + const [microphone, setMicrophone] = useState(null); + const [mediaStream, setMediaStream] = useState(null); + + const setupMicrophone = async () => { + console.log("MicrophoneContext: Setting up microphone..."); + setMicrophoneState(MicrophoneState.SettingUp); + + try { + console.log("MicrophoneContext: Requesting user media..."); + const userMedia = await navigator.mediaDevices.getUserMedia({ + audio: { + noiseSuppression: true, + echoCancellation: true, + }, + }); + + console.log( + "MicrophoneContext: User media granted, creating MediaRecorder" + ); + const newMicrophone = new MediaRecorder(userMedia); + + console.log( + "MicrophoneContext: MediaRecorder created, setting state to Ready" + ); + setMicrophoneState(MicrophoneState.Ready); + setMicrophone(newMicrophone); + setMediaStream(userMedia); + } catch (err: any) { + console.error("MicrophoneContext: Setup error:", err); + + throw err; + } + }; + + const stopMicrophone = useCallback(() => { + console.log("MicrophoneContext: Stopping microphone and all tracks..."); + setMicrophoneState(MicrophoneState.Pausing); + + // Stop the MediaRecorder + if (microphone?.state === "recording" || microphone?.state === "paused") { + console.log( + "MicrophoneContext: Stopping MediaRecorder, current state:", + microphone.state + ); + microphone.stop(); + } + + // Stop all tracks on the MediaStream to release the microphone + if (mediaStream) { + console.log("MicrophoneContext: Stopping all MediaStream tracks..."); + for (const track of mediaStream.getTracks()) { + console.log( + "MicrophoneContext: Stopping track:", + track.kind, + "enabled:", + track.enabled + ); + track.stop(); + } + } + + // Clear the microphone and stream references + setMicrophone(null); + setMediaStream(null); + setMicrophoneState(MicrophoneState.NotSetup); + console.log("MicrophoneContext: Microphone fully stopped and cleaned up"); + }, [microphone, mediaStream]); + + const startMicrophone = useCallback(() => { + if (!microphone || !mediaStream) { + console.log( + "MicrophoneContext: Cannot start - microphone or stream not available" + ); + return; + } + + console.log( + "MicrophoneContext: Starting microphone, state:", + microphone.state + ); + setMicrophoneState(MicrophoneState.Opening); + + if (microphone.state === "paused") { + console.log("MicrophoneContext: Resuming paused microphone"); + microphone.resume(); + } else if (microphone.state === "inactive") { + console.log( + "MicrophoneContext: Starting fresh microphone with 250ms chunks" + ); + microphone.start(250); + } else { + console.log( + "MicrophoneContext: Microphone already in state:", + microphone.state + ); + } + + setMicrophoneState(MicrophoneState.Open); + console.log("MicrophoneContext: Microphone state set to Open"); + }, [microphone, mediaStream]); + + return ( + + {children} + + ); +}; + +function useMicrophone(): MicrophoneContextType { + const context = useContext(MicrophoneContext); + + if (context === undefined) { + throw new Error( + "useMicrophone must be used within a MicrophoneContextProvider" + ); + } + + return context; +} + +export { MicrophoneContextProvider, useMicrophone }; diff --git a/app/globals.css b/app/globals.css index 70285a3526..4109a15457 100644 --- a/app/globals.css +++ b/app/globals.css @@ -1,11 +1,11 @@ @import "tailwindcss"; +/* Add KaTeX CSS for math rendering */ +@import "katex/dist/katex.min.css"; + /* include utility classes in streamdown */ @source '../node_modules/streamdown/dist/index.js'; -/* Add KaTeX CSS for math rendering */ -@import 'katex/dist/katex.min.css'; - /* custom variant for setting dark mode programmatically */ @custom-variant dark (&:is(.dark, .dark *)); diff --git a/app/layout.tsx b/app/layout.tsx index 66db5da925..e0cd12b786 100644 --- a/app/layout.tsx +++ b/app/layout.tsx @@ -5,6 +5,8 @@ import { ThemeProvider } from "@/components/theme-provider"; import "./globals.css"; import { SessionProvider } from "next-auth/react"; +import { DeepgramContextProvider } from "./context/deepgram-context-provider"; +import { MicrophoneContextProvider } from "./context/microphone-context-provider"; export const metadata: Metadata = { metadataBase: new URL("https://chat.vercel.ai"), @@ -79,7 +81,11 @@ export default function RootLayout({ enableSystem > - {children} + + + {children} + + diff --git a/app/not-found.tsx b/app/not-found.tsx new file mode 100644 index 0000000000..1b22a24e80 --- /dev/null +++ b/app/not-found.tsx @@ -0,0 +1,23 @@ +import Link from "next/link"; + +export default function NotFound() { + return ( +
+
+

404

+

+ This page could not be found. +

+

+ The page you are looking for doesn't exist or has been moved. +

+ + Go back home + +
+
+ ); +} diff --git a/artifacts/code/client.tsx b/artifacts/code/client.tsx index 5444a12f55..342995a87b 100644 --- a/artifacts/code/client.tsx +++ b/artifacts/code/client.tsx @@ -157,7 +157,12 @@ export const codeArtifact = new Artifact<"code", Metadata>({ ...metadata.outputs.filter((output) => output.id !== runId), { id: runId, - contents: [{ type: "text", value: message }], + contents: [ + { + type: "text", + value: message, + }, + ], status: "loading_packages", }, ], diff --git a/artifacts/sheet/client.tsx b/artifacts/sheet/client.tsx index 5db4abf438..22a5ebdc52 100644 --- a/artifacts/sheet/client.tsx +++ b/artifacts/sheet/client.tsx @@ -70,7 +70,9 @@ export const sheetArtifact = new Artifact<"sheet", Metadata>({ icon: , description: "Copy as .csv", onClick: ({ content }) => { - const parsed = parse(content, { skipEmptyLines: true }); + const parsed = parse(content, { + skipEmptyLines: true, + }); const nonEmptyRows = parsed.data.filter((row) => row.some((cell) => cell.trim() !== "") @@ -91,7 +93,10 @@ export const sheetArtifact = new Artifact<"sheet", Metadata>({ sendMessage({ role: "user", parts: [ - { type: "text", text: "Can you please format and clean the data?" }, + { + type: "text", + text: "Can you please format and clean the data?", + }, ], }); }, diff --git a/biome.jsonc b/biome.jsonc index ba27d8fd73..29f49e1aba 100644 --- a/biome.jsonc +++ b/biome.jsonc @@ -6,9 +6,15 @@ "**/*", "!components/ui", "!lib/utils.ts", - "!hooks/use-mobile.ts" + "!hooks/use-mobile.ts", + "!**/example*.tsx", + "!**/*.disabled.*" ] }, + /* Enable if you want 4 instead of 2 tabs */ + // "formatter": { + // "indentWidth": 4 + // }, "linter": { "rules": { "suspicious": { diff --git a/components/app-sidebar.tsx b/components/app-sidebar.tsx index 5210804974..60d00e2c39 100644 --- a/components/app-sidebar.tsx +++ b/components/app-sidebar.tsx @@ -8,7 +8,10 @@ import { toast } from "sonner"; import { useSWRConfig } from "swr"; import { unstable_serialize } from "swr/infinite"; import { PlusIcon, TrashIcon } from "@/components/icons"; -import { SidebarHistory, getChatHistoryPaginationKey } from "@/components/sidebar-history"; +import { + getChatHistoryPaginationKey, + SidebarHistory, +} from "@/components/sidebar-history"; import { SidebarUserNav } from "@/components/sidebar-user-nav"; import { Button } from "@/components/ui/button"; import { @@ -19,7 +22,6 @@ import { SidebarMenu, useSidebar, } from "@/components/ui/sidebar"; -import { Tooltip, TooltipContent, TooltipTrigger } from "./ui/tooltip"; import { AlertDialog, AlertDialogAction, @@ -30,6 +32,7 @@ import { AlertDialogHeader, AlertDialogTitle, } from "./ui/alert-dialog"; +import { Tooltip, TooltipContent, TooltipTrigger } from "./ui/tooltip"; export function AppSidebar({ user }: { user: User | undefined }) { const router = useRouter(); @@ -96,7 +99,6 @@ export function AppSidebar({ user }: { user: User | undefined }) { onClick={() => { setOpenMobile(false); router.push("/"); - router.refresh(); }} type="button" variant="ghost" @@ -118,13 +120,16 @@ export function AppSidebar({ user }: { user: User | undefined }) { {user && } - + Delete all chats? - This action cannot be undone. This will permanently delete all your - chats and remove them from our servers. + This action cannot be undone. This will permanently delete all + your chats and remove them from our servers. diff --git a/components/artifact.tsx b/components/artifact.tsx index 1d38846666..00244dbdc0 100644 --- a/components/artifact.tsx +++ b/components/artifact.tsx @@ -68,6 +68,8 @@ function PureArtifact({ isReadonly, selectedVisibilityType, selectedModelId, + voiceAgentActive, + voiceAgentOverlay, }: { chatId: string; input: string; @@ -84,6 +86,8 @@ function PureArtifact({ isReadonly: boolean; selectedVisibilityType: VisibilityType; selectedModelId: string; + voiceAgentActive?: boolean; + voiceAgentOverlay?: React.ReactNode; }) { const { artifact, setArtifact, metadata, setMetadata } = useArtifact(); @@ -331,21 +335,24 @@ function PureArtifact({ />
- + {!voiceAgentActive && ( + + )} + {voiceAgentActive && voiceAgentOverlay}
@@ -524,6 +531,9 @@ export const Artifact = memo(PureArtifact, (prevProps, nextProps) => { if (prevProps.selectedVisibilityType !== nextProps.selectedVisibilityType) { return false; } + if (prevProps.voiceAgentActive !== nextProps.voiceAgentActive) { + return false; + } return true; }); diff --git a/components/chat-header.tsx b/components/chat-header.tsx index a330805b5e..0125cd7b28 100644 --- a/components/chat-header.tsx +++ b/components/chat-header.tsx @@ -3,7 +3,6 @@ import Link from "next/link"; import { useRouter } from "next/navigation"; import { memo } from "react"; -import { useWindowSize } from "usehooks-ts"; import { SidebarToggle } from "@/components/sidebar-toggle"; import { Button } from "@/components/ui/button"; import { PlusIcon, VercelIcon } from "./icons"; @@ -22,25 +21,22 @@ function PureChatHeader({ const router = useRouter(); const { open } = useSidebar(); - const { width: windowWidth } = useWindowSize(); - return (
- {(!open || windowWidth < 768) && ( - - )} + {!isReadonly && ( state.isVisible); + + // Voice Agent state + const [voiceAgentActive, setVoiceAgentActive] = useState(false); + const [voiceAgentState, setVoiceAgentState] = + useState("idle"); + const [isMicMuted, setIsMicMuted] = useState(false); + const [partialTranscript, setPartialTranscript] = useState(""); + const [accumulatedTranscript, setAccumulatedTranscript] = useState(""); + const justInterruptedRef = useRef(false); // Flag: just interrupted, ignore next transcripts + + // Deepgram and Microphone contexts (from working demo) + const { + connection, + connectToDeepgram, + disconnectFromDeepgram, + connectionState, + } = useDeepgram(); + const { + setupMicrophone, + microphone, + startMicrophone, + stopMicrophone, + microphoneState, + mediaStream, + } = useMicrophone(); + + // Track if we've already set things up to prevent infinite loops + const hasMicSetupRef = useRef(false); + const hasConnectedRef = useRef(false); + const lastSubmittedRef = useRef(""); + const lastSubmittedTimestampRef = useRef(0); + const eotDetectionInProgressRef = useRef(false); + const lastAutoPlayedMessageIdRef = useRef(null); + + useEffect(() => { + setIsMounted(true); + }, []); + useEffect(() => { currentModelIdRef.current = currentModelId; }, [currentModelId]); @@ -122,6 +188,21 @@ export function Chat({ }, }); + // Track AI response status for voice agent states + useEffect(() => { + if (!voiceAgentActive) { + return; + } + + if (status === "streaming") { + // AI is responding + setVoiceAgentState("speaking"); + } else if (status === "ready" && voiceAgentState === "speaking") { + // AI finished speaking, back to listening + setVoiceAgentState("listening"); + } + }, [status, voiceAgentActive, voiceAgentState]); + const searchParams = useSearchParams(); const query = searchParams.get("query"); @@ -139,13 +220,398 @@ export function Chat({ } }, [query, sendMessage, hasAppendedQuery, id]); + // Setup microphone when component mounts - ONCE + useEffect(() => { + if (voiceAgentEnabled && !hasMicSetupRef.current) { + if (DEBUG) { + console.log("Setting up microphone..."); + } + setupMicrophone(); + hasMicSetupRef.current = true; + } + }, [voiceAgentEnabled, setupMicrophone]); + + // Connect to Deepgram when microphone is ready - ONCE + useEffect(() => { + if ( + voiceAgentEnabled && + microphoneState === MicrophoneState.Ready && + !hasConnectedRef.current + ) { + if (DEBUG) { + console.log("Connecting to Deepgram..."); + } + connectToDeepgram({ + model: "nova-2", + interim_results: true, + smart_format: true, + utterance_end_ms: 1000, + vad_events: true, + }); + setVoiceAgentActive(true); + setVoiceAgentState("listening"); + hasConnectedRef.current = true; + } + }, [voiceAgentEnabled, microphoneState, connectToDeepgram]); + + // Handle transcription events and audio streaming + useEffect(() => { + if ( + !microphone || + !connection || + connectionState !== LiveConnectionState.open + ) { + return; + } + + const onData = (e: BlobEvent) => { + if (e.data.size > 0) { + connection.send(e.data); + } + }; + + const onTranscript = (data: any) => { + const transcript = data.channel?.alternatives?.[0]?.transcript || ""; + const isFinal = data.speech_final || false; + + if (!transcript) { + return; + } + + if (DEBUG) { + console.log(`Deepgram ${isFinal ? "FINAL" : "partial"}:`, transcript); + } + + // Ignore transcripts right after an interrupt (prevents loop) + if (justInterruptedRef.current) { + if (DEBUG) { + console.log( + `Voice Agent: Ignoring transcript after interrupt (isFinal=${isFinal}): "${transcript.substring(0, 20)}"` + ); + } + // Clear flag on FINAL transcript (interrupt utterance ended) + if (isFinal) { + if (DEBUG) { + console.log( + "Voice Agent: Interrupt utterance FINAL - clearing flag, re-enabling transcripts" + ); + } + justInterruptedRef.current = false; + } + return; + } + + // INTERRUPT: If user starts speaking while AI audio is playing OR queued, STOP IMMEDIATELY! + const isCurrentlyPlaying = cartesiaTTS.isPlayingRef.current; + const isSynthesizing = cartesiaTTS.isSynthesizingRef?.current || false; + + console.log( + `Voice Agent: Interrupt check - isPlaying=${isCurrentlyPlaying}, isSynthesizing=${isSynthesizing}, transcript="${transcript.substring(0, 20)}"` + ); + + if ( + (isCurrentlyPlaying || isSynthesizing) && + transcript.trim().length > 0 + ) { + justInterruptedRef.current = true; // Set flag to ignore next transcripts + if (DEBUG) { + console.log( + `Voice Agent: ⚠️ USER INTERRUPTED (timestamp=${Date.now()}) - Stopping TTS NOW!` + ); + } + // Stop audio completely + cartesiaTTS.stop(); + // CRITICAL: Clear accumulated transcript so interrupt doesn't get submitted as new message! + setAccumulatedTranscript(""); + setPartialTranscript(""); + // Back to listening + setVoiceAgentState("listening"); + // Also stop the LLM if it's still generating + if (status === "streaming") { + if (DEBUG) { + console.log("User interrupted - stopping LLM"); + } + stop(); + } + // Don't process this transcript - it was an interrupt, not a new message + return; + } + + if (isFinal) { + // Use callback form of setState to get current value + setAccumulatedTranscript((currentAccumulated) => { + const newAccumulated = currentAccumulated + ? `${currentAccumulated} ${transcript}` + : transcript; + + // Prevent duplicate submissions - check if already submitted OR in progress + if ( + lastSubmittedRef.current === newAccumulated || + eotDetectionInProgressRef.current + ) { + if (DEBUG) { + console.log("Skipping duplicate EOT detection:", { + alreadySubmitted: lastSubmittedRef.current === newAccumulated, + inProgress: eotDetectionInProgressRef.current, + }); + } + return newAccumulated; + } + + // Mark EOT detection as in progress BEFORE any async operations + eotDetectionInProgressRef.current = true; + + // Check for EOT using nlp-worker API + // Note: This API gracefully falls back to heuristics if NLP Worker is unavailable + fetch("/api/voice/detect-eot", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + chatHistory: [{ role: "user", content: newAccumulated }], + }), + }) + .then((res) => res.json()) + .then((result) => { + const eotDetected = + result.is_end_of_utterance || result.eot_detected || false; + const probability = + result.eou_probability || result.eot_probability || 0; + const usingFallback = result._fallback || false; + const method = result._method || "unknown"; + + if (DEBUG) { + console.log( + `EOT ${method}: detected=${eotDetected}, prob=${probability.toFixed(3)}${usingFallback ? " (fallback)" : ""}` + ); + } + + // Double-check before submission to prevent race conditions + const now = Date.now(); + const timeSinceLastSubmit = + now - lastSubmittedTimestampRef.current; + + // Prevent submitting the same text, or any text within 500ms of the last submission + if ( + eotDetected && + lastSubmittedRef.current !== newAccumulated && + timeSinceLastSubmit > 500 + ) { + // Set the ref BEFORE sending to prevent any race conditions + lastSubmittedRef.current = newAccumulated; + lastSubmittedTimestampRef.current = now; + + if (DEBUG) { + console.log("Submitting message:", newAccumulated); + } + setVoiceAgentState("processing"); + + sendMessage({ + role: "user", + parts: [{ type: "text", text: newAccumulated }], + }); + + setPartialTranscript(""); + setAccumulatedTranscript(""); + } else if (eotDetected && DEBUG) { + console.log("EOT detected but skipping:", { + alreadySubmitted: lastSubmittedRef.current === newAccumulated, + tooSoon: timeSinceLastSubmit <= 500, + timeSinceLastSubmit, + }); + } + + eotDetectionInProgressRef.current = false; + }) + .catch((err) => { + // This should rarely happen since the API returns fallback on errors + if (DEBUG) { + console.warn( + "EOT API unreachable, using client-side heuristic:", + err + ); + } + + // Client-side fallback heuristic (last resort) + const hasEndPunctuation = END_PUNCT_REGEX.test(transcript.trim()); + const wordCount = transcript + .trim() + .split(WHITESPACE_REGEX).length; + + const now = Date.now(); + const timeSinceLastSubmit = + now - lastSubmittedTimestampRef.current; + + // Double-check before submission + if ( + hasEndPunctuation && + wordCount >= 3 && + lastSubmittedRef.current !== newAccumulated && + timeSinceLastSubmit > 500 + ) { + // Set the ref BEFORE sending to prevent any race conditions + lastSubmittedRef.current = newAccumulated; + lastSubmittedTimestampRef.current = now; + + if (DEBUG) { + console.log("EOT fallback, submitting:", newAccumulated); + } + setVoiceAgentState("processing"); + + sendMessage({ + role: "user", + parts: [{ type: "text", text: newAccumulated }], + }); + + setPartialTranscript(""); + setAccumulatedTranscript(""); + } + + eotDetectionInProgressRef.current = false; + }); + + return newAccumulated; + }); + } else { + // Partial transcript - update UI + setPartialTranscript(transcript); + + // First partial = interrupt + if (status === "streaming") { + if (DEBUG) { + console.log("User interrupted - stopping LLM"); + } + stop(); + player?.stop(); + } + } + }; + + // Add event listeners + connection.addListener(LiveTranscriptionEvents.Transcript, onTranscript); + microphone.addEventListener(MicrophoneEvents.DataAvailable, onData); + + // Only start if not already recording or paused + if (microphone.state !== "recording" && microphone.state !== "paused") { + startMicrophone(); + } + + return () => { + connection?.removeListener( + LiveTranscriptionEvents.Transcript, + onTranscript + ); + microphone?.removeEventListener(MicrophoneEvents.DataAvailable, onData); + }; + }, [ + connectionState, + microphone, + connection, + startMicrophone, + sendMessage, + status, + stop, + player, + cartesiaTTS, + ]); + + // Stream text chunks to TTS immediately in voice agent mode + const lastProcessedTextLengthRef = useRef(0); + + useEffect(() => { + if (!voiceAgentEnabled || !ttsEnabled) { + return; + } + + // Find the last assistant message + const lastMessage = messages.at(-1); + if (!lastMessage || lastMessage.role !== "assistant") { + // Reset when no assistant message + lastProcessedTextLengthRef.current = 0; + return; + } + + // Get current text content + const textParts = lastMessage.parts?.filter((p) => p.type === "text"); + if (!textParts || textParts.length === 0) { + return; + } + + const fullText = textParts.map((p) => p.text).join(" "); + const currentLength = fullText.length; + + // Check if this is a new message + if (lastAutoPlayedMessageIdRef.current !== lastMessage.id) { + if (DEBUG) { + console.log( + "Voice Agent: New message detected, starting TTS streaming" + ); + } + lastAutoPlayedMessageIdRef.current = lastMessage.id; + lastProcessedTextLengthRef.current = 0; + } + + // Check if we have new text + if (currentLength > lastProcessedTextLengthRef.current) { + const newText = fullText.substring(lastProcessedTextLengthRef.current); + lastProcessedTextLengthRef.current = currentLength; + + if (DEBUG) { + console.log("Voice Agent: New text chunk:", newText); + } + + // Stream text chunk directly to Cartesia + const streamChunk = async () => { + if (newText.trim().length === 0) { + return; + } + + setVoiceAgentState("speaking"); + + try { + // Stream is continuing if status is "streaming" + const shouldContinue = status === "streaming"; + + if (DEBUG) { + console.log( + `Voice Agent: Streaming chunk (continue=${shouldContinue}):`, + newText + ); + } + + // For final chunk, provide callback to reset state + const onComplete = shouldContinue + ? undefined + : () => { + if (DEBUG) { + console.log("Voice Agent: Playback complete"); + } + setVoiceAgentState("listening"); + cartesiaTTS.markComplete(); + }; + + await cartesiaTTS.synthesizeAndPlay( + newText, + shouldContinue, + onComplete + ); + } catch (error) { + console.error("Voice Agent: TTS streaming error:", error); + setVoiceAgentState("listening"); + } + }; + + streamChunk(); + } + }, [messages, status, voiceAgentEnabled, ttsEnabled, cartesiaTTS]); + const { data: votes } = useSWR( messages.length >= 2 ? `/api/vote?chatId=${id}` : null, fetcher ); const [attachments, setAttachments] = useState([]); - const isArtifactVisible = useArtifactSelector((state) => state.isVisible); + + // Note: Voice agent can work alongside artifact panel since artifact has its own layout useAutoResume({ autoResume, @@ -168,15 +634,18 @@ export function Chat({ isArtifactVisible={isArtifactVisible} isReadonly={isReadonly} messages={messages} + player={isMounted ? player : undefined} regenerate={regenerate} selectedModelId={initialChatModel} setMessages={setMessages} status={status} + ttsEnabled={isMounted ? ttsEnabled : false} votes={votes} /> + {/* Composer or Voice Agent - same container, mutually exclusive */}
- {!isReadonly && ( + {!isReadonly && !voiceAgentActive && ( )} + + {/* Voice Agent replaces composer in same container */} + {isMounted && voiceAgentActive && ( + { + if (DEBUG) { + console.log("Voice Agent: Deactivating..."); + } + // Stop any ongoing TTS + cartesiaTTS.stop(); + // Stop microphone and disconnect from Deepgram + if (DEBUG) { + console.log("Voice Agent: Stopping microphone..."); + } + stopMicrophone(); + if (DEBUG) { + console.log("Voice Agent: Disconnecting from Deepgram..."); + } + disconnectFromDeepgram(); + // Reset voice agent + setVoiceAgentActive(false); + setVoiceAgentState("idle"); + setVoiceAgentEnabled(false); // Turn off in settings too + setPartialTranscript(""); + setAccumulatedTranscript(""); + // Reset TTS streaming refs + lastAutoPlayedMessageIdRef.current = null; + lastProcessedTextLengthRef.current = 0; + // Reset setup refs so voice agent can be re-enabled + hasMicSetupRef.current = false; + hasConnectedRef.current = false; + lastSubmittedRef.current = ""; + lastSubmittedTimestampRef.current = 0; + eotDetectionInProgressRef.current = false; + }} + onStateChange={setVoiceAgentState} + onToggleMute={() => setIsMicMuted(!isMicMuted)} + player={player} + state={voiceAgentState} + /> + )}
@@ -212,6 +730,56 @@ export function Chat({ setMessages={setMessages} status={status} stop={stop} + voiceAgentActive={isMounted && voiceAgentActive} + voiceAgentOverlay={ + isMounted && voiceAgentActive ? ( + { + if (DEBUG) { + console.log("Voice Agent: Deactivating..."); + } + // Stop any ongoing TTS + cartesiaTTS.stop(); + // Stop microphone and disconnect from Deepgram + if (DEBUG) { + console.log("Voice Agent: Stopping microphone..."); + } + stopMicrophone(); + if (DEBUG) { + console.log("Voice Agent: Disconnecting from Deepgram..."); + } + disconnectFromDeepgram(); + // Reset voice agent + setVoiceAgentActive(false); + setVoiceAgentState("idle"); + setVoiceAgentEnabled(false); // Turn off in settings too + setPartialTranscript(""); + setAccumulatedTranscript(""); + // Reset TTS streaming refs + lastAutoPlayedMessageIdRef.current = null; + lastProcessedTextLengthRef.current = 0; + // Reset setup refs so voice agent can be re-enabled + hasMicSetupRef.current = false; + hasConnectedRef.current = false; + lastSubmittedRef.current = ""; + lastSubmittedTimestampRef.current = 0; + eotDetectionInProgressRef.current = false; + }} + onStateChange={setVoiceAgentState} + onToggleMute={() => setIsMicMuted(!isMicMuted)} + player={player} + state={voiceAgentState} + /> + ) : undefined + } votes={votes} /> diff --git a/components/data-stream-handler.tsx b/components/data-stream-handler.tsx index 65aa0c414f..0d5eeeb8d0 100644 --- a/components/data-stream-handler.tsx +++ b/components/data-stream-handler.tsx @@ -1,12 +1,12 @@ "use client"; -import { useEffect, useRef } from "react"; +import { useEffect } from "react"; import { initialArtifactData, useArtifact } from "@/hooks/use-artifact"; import { artifactDefinitions } from "./artifact"; import { useDataStream } from "./data-stream-provider"; export function DataStreamHandler() { - const { dataStream,setDataStream } = useDataStream(); + const { dataStream, setDataStream } = useDataStream(); const { artifact, setArtifact, setMetadata } = useArtifact(); @@ -77,7 +77,7 @@ export function DataStreamHandler() { } }); } - }, [dataStream, setArtifact, setMetadata, artifact]); + }, [dataStream, setArtifact, setMetadata, artifact, setDataStream]); return null; } diff --git a/components/document-preview.tsx b/components/document-preview.tsx index 76a7b468a4..a96e448d6c 100644 --- a/components/document-preview.tsx +++ b/components/document-preview.tsx @@ -63,7 +63,11 @@ export function DocumentPreview({ return ( ); diff --git a/components/elements/context.tsx b/components/elements/context.tsx index 96af118367..8d5d6db32f 100644 --- a/components/elements/context.tsx +++ b/components/elements/context.tsx @@ -114,7 +114,7 @@ export const Context = ({ className, usage, ...props }: ContextProps) => { className={cn( "inline-flex select-none items-center gap-1 rounded-md text-sm", "cursor-pointer bg-background text-foreground", - "focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 outline-none ring-offset-background", + "outline-none ring-offset-background focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2", className )} type="button" diff --git a/components/icons.tsx b/components/icons.tsx index 3f9e6e1236..65417ced25 100644 --- a/components/icons.tsx +++ b/components/icons.tsx @@ -1211,3 +1211,54 @@ export const WarningIcon = ({ size = 16 }: { size?: number }) => { ); }; + +export const MicrophoneIcon = ({ size = 16 }: { size?: number }) => ( + + + +); + +export const VolumeIcon = ({ size = 16 }: { size?: number }) => ( + + + +); + +export const VolumeMutedIcon = ({ size = 16 }: { size?: number }) => ( + + + +); diff --git a/components/message-actions.tsx b/components/message-actions.tsx index 155b75f7eb..09015cd798 100644 --- a/components/message-actions.tsx +++ b/components/message-actions.tsx @@ -1,12 +1,21 @@ import equal from "fast-deep-equal"; -import { memo } from "react"; +import { Volume2 } from "lucide-react"; +import { memo, useCallback, useState } from "react"; import { toast } from "sonner"; import { useSWRConfig } from "swr"; import { useCopyToClipboard } from "usehooks-ts"; +import type { usePlayer } from "@/hooks/use-player"; import type { Vote } from "@/lib/db/schema"; import type { ChatMessage } from "@/lib/types"; import { Action, Actions } from "./elements/actions"; -import { CopyIcon, PencilEditIcon, ThumbDownIcon, ThumbUpIcon } from "./icons"; +import { + CopyIcon, + LoaderIcon, + PencilEditIcon, + StopIcon, + ThumbDownIcon, + ThumbUpIcon, +} from "./icons"; export function PureMessageActions({ chatId, @@ -14,19 +23,20 @@ export function PureMessageActions({ vote, isLoading, setMode, + player, + ttsEnabled, }: { chatId: string; message: ChatMessage; vote: Vote | undefined; isLoading: boolean; setMode?: (mode: "view" | "edit") => void; + player?: ReturnType; + ttsEnabled?: boolean; }) { const { mutate } = useSWRConfig(); const [_, copyToClipboard] = useCopyToClipboard(); - - if (isLoading) { - return null; - } + const [isLoadingTTS, setIsLoadingTTS] = useState(false); const textFromParts = message.parts ?.filter((part) => part.type === "text") @@ -34,7 +44,7 @@ export function PureMessageActions({ .join("\n") .trim(); - const handleCopy = async () => { + const handleCopy = useCallback(async () => { if (!textFromParts) { toast.error("There's no text to copy!"); return; @@ -42,7 +52,71 @@ export function PureMessageActions({ await copyToClipboard(textFromParts); toast.success("Copied to clipboard!"); - }; + }, [textFromParts, copyToClipboard]); + + const handlePlayTTS = useCallback(async () => { + if (!textFromParts || !player) { + console.log("TTS: Missing text or player", { + hasText: !!textFromParts, + hasPlayer: !!player, + }); + return; + } + + if (player.isPlaying) { + console.log("TTS: Stopping playback"); + player.stop(); + return; + } + + console.log( + "TTS: Starting synthesis for text:", + `${textFromParts.substring(0, 50)}...` + ); + setIsLoadingTTS(true); + + try { + const response = await fetch("/api/voice/synthesize", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ text: textFromParts }), + }); + + console.log("TTS: Response status:", response.status, response.ok); + + if (!response.ok) { + const errorText = await response.text(); + console.error("TTS: API error response:", errorText); + throw new Error(`TTS failed: ${errorText}`); + } + + if (!response.body) { + console.error("TTS: No response body"); + throw new Error("No response body"); + } + + console.log("TTS: Got audio stream, starting playback"); + setIsLoadingTTS(false); + + await player.play(response.body, () => { + console.log("TTS: Playback ended"); + }); + + console.log("TTS: Playback started successfully"); + } catch (error) { + console.error("TTS error:", error); + toast.error( + `Failed to play audio: ${error instanceof Error ? error.message : String(error)}` + ); + setIsLoadingTTS(false); + } + }, [textFromParts, player]); + + if (isLoading) { + return null; + } // User messages get edit (on hover) and copy actions if (message.role === "user") { @@ -73,6 +147,29 @@ export function PureMessageActions({ + {player && ( + + {isLoadingTTS ? ( + + ) : player.isPlaying ? ( + + ) : ( + + )} + + )} + ["regenerate"]; isReadonly: boolean; requiresScrollPadding: boolean; + player?: ReturnType; + ttsEnabled?: boolean; }) => { const [mode, setMode] = useState<"view" | "edit">("view"); @@ -134,7 +139,9 @@ const PurePreviewMessage = ({ data-testid="message-content" style={ message.role === "user" - ? { backgroundColor: "#006cff" } + ? { + backgroundColor: "#006cff", + } : undefined } > @@ -226,7 +233,10 @@ const PurePreviewMessage = ({ return (
@@ -276,7 +286,9 @@ const PurePreviewMessage = ({ isLoading={isLoading} key={`action-${message.id}`} message={message} + player={player} setMode={setMode} + ttsEnabled={ttsEnabled} vote={vote} /> )} @@ -328,12 +340,9 @@ export const ThinkingMessage = () => {
-
- Thinking... -
+
Thinking...
); }; - diff --git a/components/messages.tsx b/components/messages.tsx index 8e0ef089d2..0869cdd029 100644 --- a/components/messages.tsx +++ b/components/messages.tsx @@ -4,6 +4,7 @@ import { AnimatePresence } from "framer-motion"; import { ArrowDownIcon } from "lucide-react"; import { memo, useEffect } from "react"; import { useMessages } from "@/hooks/use-messages"; +import type { usePlayer } from "@/hooks/use-player"; import type { Vote } from "@/lib/db/schema"; import type { ChatMessage } from "@/lib/types"; import { useDataStream } from "./data-stream-provider"; @@ -21,6 +22,8 @@ type MessagesProps = { isReadonly: boolean; isArtifactVisible: boolean; selectedModelId: string; + player?: ReturnType; + ttsEnabled?: boolean; }; function PureMessages({ @@ -31,7 +34,9 @@ function PureMessages({ setMessages, regenerate, isReadonly, - selectedModelId, + selectedModelId: _selectedModelId, + player, + ttsEnabled, }: MessagesProps) { const { containerRef: messagesContainerRef, @@ -46,7 +51,7 @@ function PureMessages({ useDataStream(); useEffect(() => { - if (status === "submitted") { + if (status === "submitted" || status === "streaming") { requestAnimationFrame(() => { const container = messagesContainerRef.current; if (container) { @@ -59,6 +64,21 @@ function PureMessages({ } }, [status, messagesContainerRef]); + // Also scroll when messages change (for voice agent) + useEffect(() => { + if (messages.length > 0) { + requestAnimationFrame(() => { + const container = messagesContainerRef.current; + if (container) { + container.scrollTo({ + top: container.scrollHeight, + behavior: "smooth", + }); + } + }); + } + }, [messages.length, messagesContainerRef]); + return (
vote.messageId === message.id) diff --git a/components/multimodal-input.tsx b/components/multimodal-input.tsx index 299f140bea..0002fd3ff8 100644 --- a/components/multimodal-input.tsx +++ b/components/multimodal-input.tsx @@ -20,6 +20,8 @@ import { toast } from "sonner"; import { useLocalStorage, useWindowSize } from "usehooks-ts"; import { saveChatModelAsCookie } from "@/app/(chat)/actions"; import { SelectItem } from "@/components/ui/select"; +import { useVoiceInput } from "@/hooks/use-voice-input"; +import { useVoiceSettings } from "@/hooks/use-voice-settings"; import { chatModels } from "@/lib/ai/models"; import { myProvider } from "@/lib/ai/providers"; import type { Attachment, ChatMessage } from "@/lib/types"; @@ -45,7 +47,17 @@ import { import { PreviewAttachment } from "./preview-attachment"; import { SuggestedActions } from "./suggested-actions"; import { Button } from "./ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, +} from "./ui/dialog"; +import { Label } from "./ui/label"; +import { Switch } from "./ui/switch"; import type { VisibilityType } from "./visibility-selector"; +import { VoiceControls } from "./voice-controls"; function PureMultimodalInput({ chatId, @@ -83,6 +95,43 @@ function PureMultimodalInput({ const textareaRef = useRef(null); const { width } = useWindowSize(); + // Voice functionality + const { + vadMode, + setVadMode, + ttsEnabled, + setTtsEnabled, + voiceAgentEnabled, + setVoiceAgentEnabled, + } = useVoiceSettings(); + const [voiceSettingsOpen, setVoiceSettingsOpen] = useState(false); + + const { isRecording, isTranscribing, toggleRecording } = useVoiceInput({ + vadMode, + onTranscript: (transcript) => { + setInput((prev) => (prev ? `${prev} ${transcript}` : transcript)); + }, + onStop: () => { + // Optional: handle when recording stops + }, + }); + + // Update visual indicator based on recording state + useEffect(() => { + const indicator = document.querySelector( + "[data-recording-indicator]" + ) as HTMLElement; + if (indicator) { + if (isRecording) { + indicator.style.opacity = "1"; + indicator.style.transform = "scale(1.1)"; + } else { + indicator.style.opacity = "0.3"; + indicator.style.transform = "scale(1)"; + } + } + }, [isRecording]); + const adjustHeight = useCallback(() => { if (textareaRef.current) { textareaRef.current.style.height = "44px"; @@ -231,36 +280,45 @@ function PureMultimodalInput({ }, [setAttachments, uploadFile] ); - + const handlePaste = useCallback( async (event: ClipboardEvent) => { const items = event.clipboardData?.items; - if (!items) return; + if (!items) { + return; + } const imageItems = Array.from(items).filter((item) => - item.type.startsWith('image/'), + item.type.startsWith("image/") ); - if (imageItems.length === 0) return; + if (imageItems.length === 0) { + return; + } // Prevent default paste behavior for images event.preventDefault(); - setUploadQueue((prev) => [...prev, 'Pasted image']); + setUploadQueue((prev) => [...prev, "Pasted image"]); try { - const uploadPromises = imageItems.map(async (item) => { - const file = item.getAsFile(); - if (!file) return; - return uploadFile(file); - }); + const uploadPromises = imageItems + .map((item) => { + const file = item.getAsFile(); + if (!file) { + return null; + } + return uploadFile(file); + }) + .filter(Boolean); const uploadedAttachments = await Promise.all(uploadPromises); const successfullyUploadedAttachments = uploadedAttachments.filter( (attachment) => + attachment !== null && attachment !== undefined && attachment.url !== undefined && - attachment.contentType !== undefined, + attachment.contentType !== undefined ); setAttachments((curr) => [ @@ -268,22 +326,24 @@ function PureMultimodalInput({ ...(successfullyUploadedAttachments as Attachment[]), ]); } catch (error) { - console.error('Error uploading pasted images:', error); - toast.error('Failed to upload pasted image(s)'); + console.error("Error uploading pasted images:", error); + toast.error("Failed to upload pasted image(s)"); } finally { setUploadQueue([]); } }, - [setAttachments], + [setAttachments, uploadFile] ); // Add paste event listener to textarea useEffect(() => { const textarea = textareaRef.current; - if (!textarea) return; + if (!textarea) { + return; + } - textarea.addEventListener('paste', handlePaste); - return () => textarea.removeEventListener('paste', handlePaste); + textarea.addEventListener("paste", handlePaste); + return () => textarea.removeEventListener("paste", handlePaste); }, [handlePaste]); return ( @@ -367,13 +427,24 @@ function PureMultimodalInput({ />{" "}
- + + setVoiceSettingsOpen(true)} + onToggleRecording={toggleRecording} + setTtsEnabled={setTtsEnabled} + setVadMode={setVadMode} + status={status} + ttsEnabled={ttsEnabled} + vadMode={vadMode} + /> 0} status={status} - data-testid="send-button" > )} + + {/* Voice Settings Dialog */} + + + + Voice Settings + + Configure voice input, text-to-speech, and Voice Agent options. + + +
+ {/* TTS Toggle */} +
+
+ +

+ Enable audio playback for assistant messages +

+
+ +
+ + {/* Voice Agent Toggle */} +
+
+ +

+ Continuous conversation with visual feedback +

+
+ +
+ + {voiceAgentEnabled && ( +
+

Voice Agent Active

+

+ The floating orb will appear when you start a chat. Speak + naturally and the AI will respond when you finish. +

+
+ )} +
+
+
); } @@ -482,7 +606,7 @@ function PureModelSelectorCompact({ value={selectedModel?.name} > - + + +

{isMuted ? "Unmute" : "Mute"} microphone

+
+ + )} + + {/* Pause/Resume */} + + + + + +

{isPaused ? "Resume" : "Pause"} voice agent

+
+
+ + {/* End Session */} + + + + + +

End voice agent session

+
+
+ + + + {/* Hint text */} +

+ Speak naturally • AI will respond when you finish +

+ + ); +} diff --git a/components/voice-controls.tsx b/components/voice-controls.tsx new file mode 100644 index 0000000000..692057f252 --- /dev/null +++ b/components/voice-controls.tsx @@ -0,0 +1,185 @@ +/** biome-ignore-all lint/correctness/noUnusedFunctionParameters: Required for React component props interface */ +"use client"; + +import { Mic, Settings } from "lucide-react"; +import { memo, useEffect, useState } from "react"; +import type { VadMode } from "@/hooks/use-voice-settings"; +import { LoaderIcon } from "./icons"; +import { Button } from "./ui/button"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "./ui/tooltip"; + +/** + * Props for VoiceControls component. + * + * @property vadMode - Current voice detection mode ("always-on" | "push-to-talk") + * @property setVadMode - Function to change VAD mode + * @property ttsEnabled - Whether text-to-speech is enabled globally + * @property setTtsEnabled - Function to toggle TTS on/off + * @property isRecording - Whether voice is currently being recorded + * @property isTranscribing - Whether audio is being transcribed (API call in progress) + * @property onToggleRecording - Callback to start/stop recording (mode-aware) + * @property status - Current chat status for enabling/disabling controls + * @property onOpenSettings - Callback to open voice settings modal + */ +type VoiceControlsProps = { + vadMode: VadMode; + setVadMode: (mode: VadMode) => void; + ttsEnabled: boolean; + setTtsEnabled: (enabled: boolean) => void; + isRecording: boolean; + isTranscribing: boolean; + onToggleRecording: () => void; + status?: "ready" | "submitted" | "streaming" | "error"; + onOpenSettings?: () => void; +}; + +/** + * Voice control buttons component for chat composer. + * + * **Purpose**: Provides UI controls for voice input features. + * Renders microphone button for recording and settings button for voice options. + * + * **Used in**: `components/multimodal-input.tsx` - Integrated into chat composer toolbar + * + * **Features**: + * - **Microphone Button**: Toggle voice recording (behavior depends on vadMode) + * - PTT mode: Click to start, click to stop + * - Visual feedback: Pulsing blue highlight when recording + * - Loading spinner during transcription + * + * - **Settings Button**: Opens voice settings modal + * - Configure TTS (text-to-speech) + * - Future: Voice Agent mode configuration + * + * **Visual States**: + * - Recording: Blue pulsing animation on microphone button + * - Transcribing: Loading spinner replaces microphone icon + * - Disabled: Grayed out when chat is not ready + * - Mounted: Prevents hydration mismatch with server-side rendering + * + * **Hydration Protection**: + * Component renders a simplified version (all buttons disabled) until client-side + * JavaScript loads. This prevents React hydration errors when localStorage values + * differ from server-rendered defaults. + * + * @param props - Voice control configuration and callbacks + * @returns Rendered voice control buttons with tooltips + * + * @see hooks/use-voice-input.ts - Voice recording logic + * @see hooks/use-voice-settings.ts - Settings persistence + * @see components/multimodal-input.tsx - Parent component + */ +function PureVoiceControls({ + vadMode, + setVadMode, + ttsEnabled, + setTtsEnabled, + isRecording, + isTranscribing, + onToggleRecording, + status, + onOpenSettings, +}: VoiceControlsProps) { + const [mounted, setMounted] = useState(false); + + useEffect(() => { + setMounted(true); + }, []); + + const isReady = status === "ready" || status === undefined; + const micDisabled = !isReady || isTranscribing; + + // Prevent hydration mismatch by not rendering until mounted + if (!mounted) { + return ( +
+ + +
+ ); + } + + return ( + +
+ + + + + +

Click to record

+

Voice transcription

+
+
+ + + + + + +

Voice settings

+
+
+
+
+ ); +} + +export const VoiceControls = memo(PureVoiceControls); diff --git a/components/weather.tsx b/components/weather.tsx index d4e7234a9a..9cfc291c3f 100644 --- a/components/weather.tsx +++ b/components/weather.tsx @@ -5,28 +5,78 @@ import { format, isWithinInterval } from "date-fns"; import { useEffect, useState } from "react"; const SunIcon = ({ size = 40 }: { size?: number }) => ( - - - - - - - - - - + + + + + + + + + + ); const MoonIcon = ({ size = 40 }: { size?: number }) => ( - - + + ); const CloudIcon = ({ size = 24 }: { size?: number }) => ( - - + + ); @@ -77,9 +127,13 @@ const SAMPLE = { timezone: "GMT", timezone_abbreviation: "GMT", elevation: 18, - current_units: { time: "iso8601", interval: "seconds", temperature_2m: "°C" }, - current: { time: "2024-10-07T19:30", interval: 900, temperature_2m: 29.3 }, - hourly_units: { time: "iso8601", temperature_2m: "°C" }, + current_units: { + time: "iso8601", + interval: "seconds", + temperature_2m: "°F", + }, + current: { time: "2024-10-07T19:30", interval: 900, temperature_2m: 84.7 }, + hourly_units: { time: "iso8601", temperature_2m: "°F" }, hourly: { time: [ "2024-10-07T00:00", @@ -184,14 +238,14 @@ const SAMPLE = { "2024-10-11T03:00", ], temperature_2m: [ - 36.6, 32.8, 29.5, 28.6, 29.2, 28.2, 27.5, 26.6, 26.5, 26, 25, 23.5, 23.9, - 24.2, 22.9, 21, 24, 28.1, 31.4, 33.9, 32.1, 28.9, 26.9, 25.2, 23, 21.1, - 19.6, 18.6, 17.7, 16.8, 16.2, 15.5, 14.9, 14.4, 14.2, 13.7, 13.3, 12.9, - 12.5, 13.5, 15.8, 17.7, 19.6, 21, 21.9, 22.3, 22, 20.7, 18.9, 17.9, 17.3, - 17, 16.7, 16.2, 15.6, 15.2, 15, 15, 15.1, 14.8, 14.8, 14.9, 14.7, 14.8, - 15.3, 16.2, 17.9, 19.6, 20.5, 21.6, 21, 20.7, 19.3, 18.7, 18.4, 17.9, - 17.3, 17, 17, 16.8, 16.4, 16.2, 16, 15.8, 15.7, 15.4, 15.4, 16.1, 16.7, - 17, 18.6, 19, 19.5, 19.4, 18.5, 17.9, 17.5, 16.7, 16.3, 16.1, + 97.9, 91, 85.1, 83.5, 84.6, 82.8, 81.5, 79.9, 79.7, 78.8, 77, 74.3, 75, + 75.6, 73.2, 69.8, 75.2, 82.6, 88.5, 93, 89.8, 84, 80.4, 77.4, 73.4, 70, + 67.3, 65.5, 63.9, 62.2, 61.2, 59.9, 58.8, 57.9, 57.6, 56.7, 55.9, 55.2, + 54.5, 56.3, 60.4, 63.9, 67.3, 69.8, 71.4, 72.1, 71.6, 69.3, 66, 64.2, + 63.1, 62.6, 62.1, 61.2, 60.1, 59.4, 59, 59, 59.2, 58.6, 58.6, 58.8, 58.5, + 58.6, 59.5, 61.2, 64.2, 67.3, 68.9, 70.9, 69.8, 69.3, 66.7, 65.7, 65.1, + 64.2, 63.1, 62.6, 62.6, 62.2, 61.5, 61.2, 60.8, 60.4, 60.3, 59.7, 59.7, + 61, 62.1, 62.6, 65.5, 66.2, 67.1, 66.9, 65.3, 64.2, 63.5, 62.1, 61.3, 61, ], }, daily_units: { @@ -233,16 +287,21 @@ export function Weather({ }: { weatherAtLocation?: WeatherAtLocation; }) { - const currentHigh = Math.max( - ...weatherAtLocation.hourly.temperature_2m.slice(0, 24) - ); - const currentLow = Math.min( - ...weatherAtLocation.hourly.temperature_2m.slice(0, 24) - ); + // Validate that all required data is present, otherwise use SAMPLE data + const data: WeatherAtLocation = + weatherAtLocation?.hourly?.temperature_2m && + weatherAtLocation?.current?.temperature_2m !== undefined && + weatherAtLocation?.daily?.sunrise?.[0] && + weatherAtLocation?.daily?.sunset?.[0] + ? weatherAtLocation + : SAMPLE; + + const currentHigh = Math.max(...data.hourly.temperature_2m.slice(0, 24)); + const currentLow = Math.min(...data.hourly.temperature_2m.slice(0, 24)); - const isDay = isWithinInterval(new Date(weatherAtLocation.current.time), { - start: new Date(weatherAtLocation.daily.sunrise[0]), - end: new Date(weatherAtLocation.daily.sunset[0]), + const isDay = isWithinInterval(new Date(data.current.time), { + start: new Date(data.daily.sunrise[0]), + end: new Date(data.daily.sunset[0]), }); const [isMobile, setIsMobile] = useState(false); @@ -260,97 +319,105 @@ export function Weather({ const hoursToShow = isMobile ? 5 : 6; - const currentTimeIndex = weatherAtLocation.hourly.time.findIndex( - (time) => new Date(time) >= new Date(weatherAtLocation.current.time) + const currentTimeIndex = data.hourly.time.findIndex( + (time) => new Date(time) >= new Date(data.current.time) ); - const displayTimes = weatherAtLocation.hourly.time.slice( + const displayTimes = data.hourly.time.slice( currentTimeIndex, currentTimeIndex + hoursToShow ); - const displayTemperatures = weatherAtLocation.hourly.temperature_2m.slice( + const displayTemperatures = data.hourly.temperature_2m.slice( currentTimeIndex, currentTimeIndex + hoursToShow ); - const location = weatherAtLocation.cityName || - `${weatherAtLocation.latitude?.toFixed(1)}°, ${weatherAtLocation.longitude?.toFixed(1)}°`; + const location = + data.cityName || + `${data.latitude?.toFixed(1)}°, ${data.longitude?.toFixed(1)}°`; return (
- +
-
-
- {location} -
+
+
{location}
- {format(new Date(weatherAtLocation.current.time), "MMM d, h:mm a")} + {format(new Date(data.current.time), "MMM d, h:mm a")}
-
+
-
+
{isDay ? : }
-
- {n(weatherAtLocation.current.temperature_2m)} +
+ {n(data.current.temperature_2m)} - {weatherAtLocation.current_units.temperature_2m} + {data.current_units.temperature_2m}
-
+
H: {n(currentHigh)}°
-
- L: {n(currentLow)}° -
+
L: {n(currentLow)}°
-
-
+
+
Hourly Forecast
{displayTimes.map((time, index) => { const hourTime = new Date(time); - const isCurrentHour = hourTime.getHours() === new Date().getHours(); - + const isCurrentHour = + hourTime.getHours() === new Date().getHours(); + return ( -
-
+
{index === 0 ? "Now" : format(hourTime, "ha")}
- -
+ +
- -
+ +
{n(displayTemperatures[index])}°
@@ -359,9 +426,11 @@ export function Weather({
-
-
Sunrise: {format(new Date(weatherAtLocation.daily.sunrise[0]), "h:mm a")}
-
Sunset: {format(new Date(weatherAtLocation.daily.sunset[0]), "h:mm a")}
+
+
+ Sunrise: {format(new Date(data.daily.sunrise[0]), "h:mm a")} +
+
Sunset: {format(new Date(data.daily.sunset[0]), "h:mm a")}
diff --git a/hooks/use-audio-amplitude.ts b/hooks/use-audio-amplitude.ts new file mode 100644 index 0000000000..bcfd282e3f --- /dev/null +++ b/hooks/use-audio-amplitude.ts @@ -0,0 +1,233 @@ +import { useEffect, useRef, useState } from "react"; + +/** + * Hook to extract real-time audio amplitude from a MediaStream (microphone). + * + * **Purpose**: Provides visual feedback for Voice Agent mode by analyzing + * microphone input levels using Web Audio API's AnalyserNode. + * + * **Used in**: `components/voice-agent-overlay.tsx` + * + * **How it works**: + * 1. Creates AudioContext and AnalyserNode + * 2. Connects MediaStream source to analyzer + * 3. Continuously samples audio data via requestAnimationFrame + * 4. Calculates RMS (root mean square) amplitude + * 5. Applies smoothing to prevent jitter + * + * @param stream - MediaStream from microphone (from getUserMedia or VAD) + * @param smoothing - Smoothing factor 0-1 (higher = smoother but slower response) + * @returns Normalized amplitude value 0-1 + * + * @example + * ```tsx + * const { stream } = useVoiceInput(); + * const amplitude = useMicrophoneAmplitude(stream); + * + * ``` + */ +export function useMicrophoneAmplitude( + stream: MediaStream | null, + smoothing = 0.8 +): number { + const [amplitude, setAmplitude] = useState(0); + const audioContextRef = useRef(null); + const analyserRef = useRef(null); + const dataArrayRef = useRef(null); + const rafRef = useRef(null); + const smoothedAmpRef = useRef(0); + + useEffect(() => { + if (!stream) { + // No stream, reset amplitude + setAmplitude(0); + smoothedAmpRef.current = 0; + return; + } + + // Create audio context and analyzer + const audioContext = new AudioContext(); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 256; + analyser.smoothingTimeConstant = 0.8; + + const source = audioContext.createMediaStreamSource(stream); + source.connect(analyser); + + const bufferLength = analyser.frequencyBinCount; + const dataArray = new Uint8Array(bufferLength); + + audioContextRef.current = audioContext; + analyserRef.current = analyser; + dataArrayRef.current = dataArray; + + // Animation loop to update amplitude + const updateAmplitude = () => { + if (!analyserRef.current || !dataArrayRef.current) { + return; + } + + analyserRef.current.getByteTimeDomainData(dataArrayRef.current); + + // Calculate RMS (root mean square) amplitude + let sum = 0; + // biome-ignore lint/style/useForOf: because im lazy + for (let i = 0; i < dataArrayRef.current.length; i++) { + const normalized = (dataArrayRef.current[i] - 128) / 128; + sum += normalized * normalized; + } + const rms = Math.sqrt(sum / dataArrayRef.current.length); + + // Apply smoothing + smoothedAmpRef.current = + smoothing * smoothedAmpRef.current + (1 - smoothing) * rms; + + // Normalize and clamp to 0-1 + // Boost the amplitude more for better visualization + const normalizedAmp = Math.min(1, smoothedAmpRef.current * 5); + setAmplitude(normalizedAmp); + + rafRef.current = requestAnimationFrame(updateAmplitude); + }; + + updateAmplitude(); + + // Cleanup + return () => { + if (rafRef.current) { + cancelAnimationFrame(rafRef.current); + } + if (audioContextRef.current) { + audioContextRef.current.close(); + } + }; + }, [stream, smoothing]); + + return amplitude; +} + +/** + * Hook to extract real-time audio amplitude from HTMLAudioElement (TTS playback). + * + * **Purpose**: Provides visual feedback during agent speech by analyzing + * TTS audio output levels. + * + * **Used in**: `components/voice-agent-overlay.tsx` + * + * **How it works**: + * 1. Creates AudioContext and AnalyserNode + * 2. Connects audio element as source + * 3. Continuously samples audio data + * 4. Calculates frequency domain amplitude + * 5. Returns smoothed amplitude + * + * @param audioElement - HTMLAudioElement playing TTS audio + * @param smoothing - Smoothing factor 0-1 + * @returns Normalized amplitude value 0-1 + * + * @example + * ```tsx + * const audioRef = useRef(null); + * const amplitude = useAudioElementAmplitude(audioRef.current); + * + * ``` + */ +export function useAudioElementAmplitude( + audioElement: HTMLAudioElement | null, + smoothing = 0.8 +): number { + const [amplitude, setAmplitude] = useState(0); + const audioContextRef = useRef(null); + const analyserRef = useRef(null); + const dataArrayRef = useRef(null); + const rafRef = useRef(null); + const smoothedAmpRef = useRef(0); + const sourceRef = useRef(null); + + useEffect(() => { + if (!audioElement) { + setAmplitude(0); + smoothedAmpRef.current = 0; + return; + } + + // Create audio context and analyzer + const audioContext = new AudioContext(); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 256; + + // Create source from audio element + const source = audioContext.createMediaElementSource(audioElement); + source.connect(analyser); + analyser.connect(audioContext.destination); // Must connect to destination for audio to play + + const bufferLength = analyser.frequencyBinCount; + const dataArray = new Uint8Array(bufferLength); + + audioContextRef.current = audioContext; + analyserRef.current = analyser; + dataArrayRef.current = dataArray; + sourceRef.current = source; + + // Animation loop + const updateAmplitude = () => { + if (!analyserRef.current || !dataArrayRef.current) { + return; + } + + analyserRef.current.getByteFrequencyData(dataArrayRef.current); + + // Calculate average frequency amplitude + let sum = 0; + for (const value of dataArrayRef.current) { + sum += value; + } + const average = sum / dataArrayRef.current.length / 255; // Normalize to 0-1 + + // Apply smoothing + smoothedAmpRef.current = + smoothing * smoothedAmpRef.current + (1 - smoothing) * average; + + setAmplitude(smoothedAmpRef.current); + + rafRef.current = requestAnimationFrame(updateAmplitude); + }; + + updateAmplitude(); + + // Cleanup + return () => { + if (rafRef.current) { + cancelAnimationFrame(rafRef.current); + } + if (audioContextRef.current) { + audioContextRef.current.close(); + } + }; + }, [audioElement, smoothing]); + + return amplitude; +} + +/** + * Hook to track audio playback state from player. + * + * **Purpose**: Provides playback state for Voice Agent UI to show + * appropriate visual feedback. + * + * @param player - Audio player instance from usePlayer hook + * @returns Boolean indicating if audio is currently playing + */ +export function useIsPlaying( + player: { + isPlaying: boolean; + } | null +): boolean { + const [isPlaying, setIsPlaying] = useState(false); + + useEffect(() => { + setIsPlaying(player?.isPlaying ?? false); + }, [player?.isPlaying]); + + return isPlaying; +} diff --git a/hooks/use-cartesia-tts.ts b/hooks/use-cartesia-tts.ts new file mode 100644 index 0000000000..35e39fc578 --- /dev/null +++ b/hooks/use-cartesia-tts.ts @@ -0,0 +1,484 @@ +import { useCallback, useRef, useState } from "react"; + +/** + * React hook for client-side Cartesia TTS using WebSocket streaming. + * + * **Purpose**: Ultra-low latency TTS by streaming text chunks directly to + * Cartesia's WebSocket API and playing audio immediately as it arrives. + * + * **Used in**: + * - Voice Agent mode in `components/chat.tsx` + * + * **How It Works**: + * 1. Fetches temporary Cartesia access token from server + * 2. Opens WebSocket connection to Cartesia + * 3. Streams text chunks with context_id for prosody continuity + * 4. Receives audio chunks and plays immediately (Web Audio API) + * 5. Maintains connection for entire conversation + * + * **Benefits vs REST API**: + * - Ultra-low latency: WebSocket persistent connection + * - True streaming: Send text chunks, get audio chunks immediately + * - Context preservation: Maintains prosody across sentences + * - Efficient: One connection for entire conversation + * - Multiplexing: Multiple concurrent generations + * + * **Audio Format**: + * - Format: PCM_F32LE (32-bit float, little-endian) + * - Sample Rate: 24,000 Hz + * - Channels: 1 (mono) + * + * @returns Object with TTS controls: + * - isPlaying: Boolean indicating if audio is currently playing + * - synthesizeAndPlay(text, callback): Synthesizes text and plays audio + * - stop(): Stops current playback and closes WebSocket + * - isConnected: Boolean indicating if WebSocket is connected + * + * @example + * ```tsx + * const cartesiaTTS = useCartesiaTTS(); + * + * // In voice agent mode, as text chunks arrive: + * onChunk={(chunk) => { + * cartesiaTTS.synthesizeAndPlay(chunk.text, () => { + * console.log("Chunk playback finished"); + * }); + * }} + * ``` + * + * @see app/(chat)/api/voice/token/route.ts - Token generation endpoint + * @see https://docs.cartesia.ai/api-reference/tts/tts - WebSocket API docs + */ +export function useCartesiaTTS() { + const [isPlaying, setIsPlaying] = useState(false); + const [isConnected, setIsConnected] = useState(false); + const isPlayingRef = useRef(false); // Keep a ref for callbacks + const isPreparingRef = useRef(false); // Track if we're preparing to play (connecting/synthesizing) + + // Helper to update both state and ref + const updateIsPlaying = useCallback((playing: boolean) => { + console.log( + `Cartesia: isPlaying changed: ${isPlayingRef.current} → ${playing} (timestamp=${Date.now()})` + ); + isPlayingRef.current = playing; + setIsPlaying(playing); + }, []); + + const audioContext = useRef(null); + const websocket = useRef(null); + const tokenCache = useRef<{ token: string; expiresAt: number } | null>(null); + const nextStartTime = useRef(0); + const contextId = useRef(null); + const currentCallback = useRef<(() => void) | null>(null); + const isFirstChunk = useRef(true); + const isSynthesizing = useRef(false); + const activeSourceNodes = useRef([]); + + /** + * Get a valid Cartesia access token, fetching a new one if needed. + */ + const getToken = useCallback(async (): Promise => { + const now = Date.now(); + + // Return cached token if still valid (with 5s buffer) + if (tokenCache.current && tokenCache.current.expiresAt > now + 5000) { + return tokenCache.current.token; + } + + // Fetch new token from server + const response = await fetch("/api/voice/token", { + method: "POST", + }); + + if (!response.ok) { + throw new Error("Failed to get Cartesia token"); + } + + const data = await response.json(); + console.log("Cartesia: Token response:", data); + + if (!data.token) { + console.error("Cartesia: No token in response!", data); + throw new Error("Token response missing token field"); + } + + const expiresAt = now + data.expiresIn * 1000; + + tokenCache.current = { + token: data.token, + expiresAt, + }; + + console.log("Cartesia: Token fetched, expires in", data.expiresIn, "s"); + console.log("Cartesia: Token length:", data.token.length); + return data.token; + }, []); + + /** + * Connect to Cartesia WebSocket API. + */ + const connect = useCallback(async () => { + if (websocket.current?.readyState === WebSocket.OPEN) { + return; // Already connected + } + + const token = await getToken(); + + console.log("Cartesia WebSocket: Connecting to Cartesia..."); + console.log( + "Cartesia WebSocket: Token:", + typeof token, + token ? `length=${token.length}` : "UNDEFINED!" + ); + + if (!token) { + console.error("Cartesia WebSocket: Token is undefined or null!"); + throw new Error("Cannot connect: token is undefined"); + } + + console.log("Cartesia WebSocket: Token prefix:", token.substring(0, 20)); + + // Note: Cartesia WebSocket expects 'access_token' for client-side, 'api_key' for server-side + const wsUrl = `wss://api.cartesia.ai/tts/websocket?cartesia_version=2024-06-30&access_token=${encodeURIComponent(token)}`; + console.log( + "Cartesia WebSocket: Connecting to URL (token hidden):", + wsUrl.replace(token, "***TOKEN***") + ); + + const ws = new WebSocket(wsUrl); + + ws.binaryType = "arraybuffer"; + + ws.onopen = () => { + console.log("Cartesia WebSocket: Connected successfully!"); + setIsConnected(true); + // Generate new context ID for this session + contextId.current = `ctx_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`; + isFirstChunk.current = true; + }; + + ws.onmessage = (event) => { + try { + const message = JSON.parse(event.data); + + if (message.type === "chunk" && message.data) { + // Decode base64 audio data + const audioData = Uint8Array.from(atob(message.data), (c) => + c.charCodeAt(0) + ); + + // Initialize AudioContext on first chunk + if (!audioContext.current) { + audioContext.current = new AudioContext({ + sampleRate: 24_000, + }); + nextStartTime.current = audioContext.current.currentTime; + console.log( + "Cartesia WebSocket: AudioContext created, currentTime:", + audioContext.current.currentTime + ); + } + + const context = audioContext.current; + + // Ensure nextStartTime is not in the past + const now = context.currentTime; + if (nextStartTime.current < now) { + console.log( + `Cartesia WebSocket: nextStartTime (${nextStartTime.current.toFixed(3)}) is in the past, resetting to now (${now.toFixed(3)})` + ); + nextStartTime.current = now; + } + + // Convert to Float32Array for PCM_F32LE + const buffer = new Float32Array( + audioData.buffer, + audioData.byteOffset, + audioData.byteLength / 4 + ); + + if (buffer.length > 0) { + const audioBuffer = context.createBuffer( + 1, + buffer.length, + context.sampleRate + ); + audioBuffer.copyToChannel(buffer, 0); + + const sourceNode = context.createBufferSource(); + sourceNode.buffer = audioBuffer; + sourceNode.connect(context.destination); + + // Track active nodes so we can stop them on interrupt + activeSourceNodes.current.push(sourceNode); + sourceNode.onended = () => { + // Remove from active list when done + activeSourceNodes.current = activeSourceNodes.current.filter( + (n) => n !== sourceNode + ); + }; + + const scheduleTime = nextStartTime.current; + sourceNode.start(scheduleTime); + + nextStartTime.current += audioBuffer.duration; + + if (isFirstChunk.current) { + console.log( + `Cartesia WebSocket: First audio chunk scheduled at ${scheduleTime.toFixed(3)}s, duration ${audioBuffer.duration.toFixed(3)}s, will end at ${nextStartTime.current.toFixed(3)}s` + ); + isFirstChunk.current = false; + // isPlaying already set to true when we sent the message + } + } + } else if (message.type === "done") { + const context = audioContext.current; + const endTime = nextStartTime.current; + + if (!context) { + console.log( + "Cartesia WebSocket: Generation complete but no AudioContext" + ); + isSynthesizing.current = false; + updateIsPlaying(false); + if (currentCallback.current) { + currentCallback.current(); + currentCallback.current = null; + } + return; + } + + const now = context.currentTime; + console.log( + `Cartesia WebSocket: Generation complete. currentTime=${now.toFixed(3)}s, nextStartTime=${endTime.toFixed(3)}s` + ); + + // Calculate when all audio finishes for THIS generation + if (endTime > now) { + const totalDuration = (endTime - now) * 1000; + + console.log( + `Cartesia WebSocket: Audio will finish in ${totalDuration.toFixed(0)}ms (at ${endTime.toFixed(3)}s)` + ); + + // Wait for audio to FINISH before allowing next synthesis + setTimeout(() => { + console.log( + `Cartesia WebSocket: Audio playback completed at ${context.currentTime.toFixed(3)}s` + ); + isSynthesizing.current = false; // NOW allow next synthesis + + // Call the callback but DON'T automatically set isPlaying=false + // Let the callback decide based on whether more text is coming + if (currentCallback.current) { + currentCallback.current(); + currentCallback.current = null; + } + + // NOTE: isPlaying stays true! Chat.tsx will explicitly call + // a method to set it false when ALL sentences are done + }, totalDuration + 100); + } else { + // No audio to wait for + console.log( + `Cartesia WebSocket: No audio to wait for (endTime ${endTime.toFixed(3)} <= now ${now.toFixed(3)})` + ); + isSynthesizing.current = false; + + if (currentCallback.current) { + currentCallback.current(); + currentCallback.current = null; + } + + if (nextStartTime.current <= context.currentTime) { + updateIsPlaying(false); + } + } + } else if (message.type === "error") { + console.error("Cartesia WebSocket error:", message); + updateIsPlaying(false); + } + } catch (parseError) { + console.error("Cartesia WebSocket: Message parsing error:", parseError); + } + }; + + ws.onerror = (error) => { + console.error("Cartesia WebSocket error event:", error); + console.error("WebSocket readyState:", ws.readyState); + console.error( + "Common causes: Invalid token, CORS issue, or Cartesia API down" + ); + setIsConnected(false); + }; + + ws.onclose = () => { + console.log("Cartesia WebSocket: Disconnected"); + setIsConnected(false); + updateIsPlaying(false); + }; + + websocket.current = ws; + + // Wait for connection to open + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + reject(new Error("WebSocket connection timeout")); + }, 10_000); + + ws.addEventListener("open", () => { + clearTimeout(timeout); + // Add small delay to ensure connection is fully ready + setTimeout(() => resolve(), 100); + }); + + ws.addEventListener("error", () => { + clearTimeout(timeout); + reject(new Error("WebSocket connection failed")); + }); + }); + }, [getToken, updateIsPlaying]); + + /** + * Stop audio only, keep WebSocket alive (for interrupts). + */ + const stopAudio = useCallback(() => { + console.log("Cartesia: Stopping audio (keeping WebSocket alive)"); + + // IMMEDIATELY stop all playing audio nodes + for (const node of activeSourceNodes.current) { + try { + node.stop(); + node.disconnect(); + } catch { + // Node might already be stopped, ignore + } + } + activeSourceNodes.current = []; + + // Close AudioContext + audioContext.current?.close(); + audioContext.current = null; + + // Reset playback state + nextStartTime.current = 0; + currentCallback.current = null; + isFirstChunk.current = true; + isSynthesizing.current = false; + isPreparingRef.current = false; + updateIsPlaying(false); + }, [updateIsPlaying]); + + /** + * Stop audio and close WebSocket (for cleanup/deactivate). + */ + const stop = useCallback(() => { + console.log("Cartesia: Stopping ALL audio and closing connection"); + + // Stop audio first + stopAudio(); + + // Close WebSocket + websocket.current?.close(); + websocket.current = null; + + // Reset connection state + contextId.current = null; + setIsConnected(false); + }, [stopAudio]); + + /** + * Synthesize text to speech using Cartesia WebSocket and play immediately. + * Can be called multiple times for streaming chunks with prosody continuity. + * + * @param text - The text to synthesize + * @param shouldContinue - Set to true for streaming chunks, false for final chunk + * @param onComplete - Optional callback when audio playback completes + */ + const synthesizeAndPlay = useCallback( + async (text: string, shouldContinue = false, onComplete?: () => void) => { + try { + // Ensure WebSocket is connected + try { + await connect(); + } catch (connectError) { + console.error("Cartesia: Connection failed:", connectError); + throw new Error(`Failed to connect to Cartesia: ${connectError}`); + } + + // Double-check connection state after connect() completes + if ( + !websocket.current || + websocket.current.readyState !== WebSocket.OPEN + ) { + console.error("Cartesia: WebSocket state after connect():", { + exists: !!websocket.current, + readyState: websocket.current?.readyState, + OPEN: WebSocket.OPEN, + }); + throw new Error("WebSocket not connected after connection attempt"); + } + + console.log( + `Cartesia WebSocket: Sending text (continue=${shouldContinue}):`, + text.substring(0, 50) + ); + + // Set isPlaying to true IMMEDIATELY so interrupts are detected + updateIsPlaying(true); + + // Set callback for THIS generation (only matters for final chunk) + if (!shouldContinue) { + currentCallback.current = onComplete || null; + } + + // Send text chunk to Cartesia + const message = { + model_id: "sonic-english", + transcript: text, + voice: { + mode: "id", + id: "79a125e8-cd45-4c13-8a67-188112f4dd22", + }, + output_format: { + container: "raw", + encoding: "pcm_f32le", + sample_rate: 24_000, + }, + language: "en", + context_id: contextId.current, + continue: shouldContinue, + }; + + websocket.current.send(JSON.stringify(message)); + } catch (error) { + console.error("Cartesia WebSocket: Synthesis error:", error); + updateIsPlaying(false); + // Don't rethrow - this would crash the voice agent + // Instead, let it continue and try again on next chunk + } + }, + [connect, updateIsPlaying] + ); + + /** + * Explicitly mark playback as complete. + * Called by chat.tsx when all sentences are done. + */ + const markComplete = useCallback(() => { + console.log("Cartesia: Explicitly marking playback as complete"); + updateIsPlaying(false); + }, [updateIsPlaying]); + + return { + isPlaying, + isPlayingRef, // Export the ref so callbacks can access current value + isPreparingRef, // Export preparing state so interrupts can detect it + isSynthesizingRef: isSynthesizing, // Export synthesizing ref too + isConnected, + synthesizeAndPlay, + stop, + stopAudio, // Stop audio but keep WebSocket (fast interrupts) + markComplete, // Explicitly mark playback as complete + }; +} diff --git a/hooks/use-player.ts b/hooks/use-player.ts new file mode 100644 index 0000000000..a64f0f1e57 --- /dev/null +++ b/hooks/use-player.ts @@ -0,0 +1,159 @@ +import { useCallback, useRef, useState } from "react"; + +/** + * React hook for streaming audio playback using Web Audio API. + * + * **Purpose**: Plays streaming PCM audio from Cartesia TTS API with low latency. + * Handles audio buffering, scheduling, and playback state management. + * + * **Used in**: + * - `components/chat.tsx` - Main chat component for voice output + * - `components/message-actions.tsx` - TTS playback button on assistant messages + * + * **Audio Format Requirements**: + * - Format: PCM_F32LE (32-bit float, little-endian) + * - Sample Rate: 24,000 Hz + * - Channels: 1 (mono) + * - Source: Cartesia Sonic TTS API streaming response + * + * **How It Works**: + * 1. Creates AudioContext at 24kHz sample rate + * 2. Reads streaming response chunks (ReadableStream) + * 3. Converts raw bytes to Float32Array PCM samples + * 4. Creates AudioBuffers and schedules them for gapless playback + * 5. Handles leftover bytes between chunks (4-byte alignment for Float32) + * 6. Calls callback when playback completes + * + * **Technical Details**: + * - Uses Web Audio API's AudioContext and AudioBufferSourceNode + * - Schedules buffers using precise timing (nextStartTime) for seamless playback + * - Handles partial data between reads (leftover bytes) + * - Automatically cleans up AudioContext on stop + * + * @returns Object with playback controls: + * - isPlaying: Boolean indicating if audio is currently playing + * - play(stream, callback): Starts streaming audio playback + * - stop(): Stops playback and closes AudioContext + * + * @example + * ```tsx + * const player = usePlayer(); + * + * // Play TTS audio + * const response = await fetch("/api/voice/synthesize", { ... }); + * if (response.body) { + * player.play(response.body, () => { + * console.log("Playback finished"); + * }); + * } + * + * // Stop playback + * if (player.isPlaying) { + * player.stop(); + * } + * ``` + * + * @see app/(chat)/api/voice/synthesize/route.ts - Audio source (Cartesia TTS) + * @see components/message-actions.tsx - UI trigger for playback + */ +export function usePlayer() { + const [isPlaying, setIsPlaying] = useState(false); + const audioContext = useRef(null); + const source = useRef(null); + + const stop = useCallback(() => { + audioContext.current?.close(); + audioContext.current = null; + setIsPlaying(false); + }, []); + + const play = useCallback( + async (stream: ReadableStream, callback: () => void) => { + stop(); + const context = new AudioContext({ sampleRate: 24_000 }); + audioContext.current = context; + + let nextStartTime = context.currentTime; + const reader = stream.getReader(); + let leftover = new Uint8Array(); + let result = await reader.read(); + setIsPlaying(true); + + while (!result.done && context) { + const data = new Uint8Array(leftover.length + result.value.length); + data.set(leftover); + data.set(result.value, leftover.length); + + const length = Math.floor(data.length / 4) * 4; + const remainder = data.length % 4; + const buffer = new Float32Array(data.buffer, 0, length / 4); + + leftover = new Uint8Array(data.buffer, length, remainder); + + if (buffer.length > 0) { + const audioBuffer = context.createBuffer( + 1, + buffer.length, + context.sampleRate + ); + audioBuffer.copyToChannel(buffer, 0); + + const sourceNode = context.createBufferSource(); + sourceNode.buffer = audioBuffer; + sourceNode.connect(context.destination); + sourceNode.start(nextStartTime); + + nextStartTime += audioBuffer.duration; + source.current = sourceNode; + } + + result = await reader.read(); + } + + // Process any remaining leftover bytes at the end + if (leftover.length >= 4 && context) { + const length = Math.floor(leftover.length / 4) * 4; + const buffer = new Float32Array(leftover.buffer, 0, length / 4); + + if (buffer.length > 0) { + const audioBuffer = context.createBuffer( + 1, + buffer.length, + context.sampleRate + ); + audioBuffer.copyToChannel(buffer, 0); + + const sourceNode = context.createBufferSource(); + sourceNode.buffer = audioBuffer; + sourceNode.connect(context.destination); + sourceNode.start(nextStartTime); + + nextStartTime += audioBuffer.duration; + source.current = sourceNode; + } + } + + // Wait for ALL scheduled audio to finish playing + // Calculate total duration from current time to when last buffer ends + const totalDuration = nextStartTime - context.currentTime; + + // Schedule cleanup after all audio finishes + setTimeout( + () => { + if (audioContext.current === context) { + stop(); + callback(); + } + }, + totalDuration * 1000 + 100 + ); // Add 100ms buffer to be safe + }, + [stop] + ); + + return { + isPlaying, + play, + stop, + }; +} diff --git a/hooks/use-scroll-to-bottom.tsx b/hooks/use-scroll-to-bottom.tsx index c56e3f5c58..3577fd2180 100644 --- a/hooks/use-scroll-to-bottom.tsx +++ b/hooks/use-scroll-to-bottom.tsx @@ -9,7 +9,9 @@ export function useScrollToBottom() { const [isAtBottom, setIsAtBottom] = useState(true); const { data: scrollBehavior = false, mutate: setScrollBehavior } = - useSWR("messages:should-scroll", null, { fallbackData: false }); + useSWR("messages:should-scroll", null, { + fallbackData: false, + }); const handleScroll = useCallback(() => { if (!containerRef.current) { diff --git a/hooks/use-voice-input.ts b/hooks/use-voice-input.ts new file mode 100644 index 0000000000..964bf6caae --- /dev/null +++ b/hooks/use-voice-input.ts @@ -0,0 +1,158 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { toast } from "sonner"; +import type { VadMode } from "./use-voice-settings"; + +/** + * Options for configuring voice input behavior. + * + * @property vadMode - Determines how voice input is triggered (currently only push-to-talk is supported) + * @property onTranscript - Callback invoked when speech is successfully transcribed + * @property onStop - Optional callback invoked when recording stops + */ +export type UseVoiceInputOptions = { + vadMode: VadMode; + onTranscript: (transcript: string) => void; + onStop?: () => void; +}; + +/** + * React hook for push-to-talk voice input functionality. + * + * Uses browser's native MediaRecorder API for reliable audio recording + * and transcription via Groq's Whisper API. + * + * @param options - Configuration object with callbacks + * @returns Object with recording state and controls + */ +export function useVoiceInput({ onTranscript, onStop }: UseVoiceInputOptions) { + const [isTranscribing, setIsTranscribing] = useState(false); + const [isRecording, setIsRecording] = useState(false); + const mediaRecorderRef = useRef(null); + const audioChunksRef = useRef([]); + + const transcribe = useCallback( + async (audioBlob: Blob) => { + setIsTranscribing(true); + + try { + const formData = new FormData(); + formData.append("audio", audioBlob, "audio.wav"); + + const response = await fetch("/api/voice/transcribe", { + method: "POST", + body: formData, + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(error || "Transcription failed"); + } + + const data = await response.json(); + const transcript = data.transcript; + + if (transcript) { + onTranscript(transcript); + } + } catch (error) { + console.error("Transcription error:", error); + toast.error("Failed to transcribe audio. Please try again."); + } finally { + setIsTranscribing(false); + } + }, + [onTranscript] + ); + + // Push-to-talk recording + const startPushToTalk = useCallback(async () => { + try { + const stream = await navigator.mediaDevices.getUserMedia({ + audio: true, + }); + const mediaRecorder = new MediaRecorder(stream, { + mimeType: "audio/webm;codecs=opus", + }); + mediaRecorderRef.current = mediaRecorder; + audioChunksRef.current = []; + + mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + audioChunksRef.current.push(event.data); + } + }; + + mediaRecorder.onstop = () => { + if (audioChunksRef.current.length === 0) { + toast.error("No audio was recorded. Please try again."); + return; + } + + const audioBlob = new Blob(audioChunksRef.current, { + type: "audio/webm", + }); + + // Check if audio blob has content + if (audioBlob.size === 0) { + toast.error("Recording was empty. Please speak and try again."); + return; + } + + // Groq's Whisper accepts various formats including webm + transcribe(audioBlob); + + // Stop all tracks + for (const track of stream.getTracks()) { + track.stop(); + } + }; + + mediaRecorder.start(); + setIsRecording(true); + } catch (error) { + console.error("Failed to start recording:", error); + toast.error("Failed to access microphone. Please check permissions."); + } + }, [transcribe]); + + const stopPushToTalk = useCallback(() => { + if ( + mediaRecorderRef.current && + mediaRecorderRef.current.state !== "inactive" + ) { + mediaRecorderRef.current.stop(); + setIsRecording(false); + onStop?.(); + } + }, [onStop]); + + // Cleanup: Stop any active recording when component unmounts + useEffect(() => { + return () => { + if ( + mediaRecorderRef.current && + mediaRecorderRef.current.state !== "inactive" + ) { + mediaRecorderRef.current.stop(); + setIsRecording(false); + } + }; + }, []); + + const toggleRecording = useCallback(() => { + if (isRecording) { + stopPushToTalk(); + } else { + startPushToTalk(); + } + }, [isRecording, startPushToTalk, stopPushToTalk]); + + return { + isRecording, + isTranscribing, + vadState: null, // Kept for compatibility, but no longer used + startRecording: startPushToTalk, + stopRecording: stopPushToTalk, + toggleRecording, + }; +} diff --git a/hooks/use-voice-settings.ts b/hooks/use-voice-settings.ts new file mode 100644 index 0000000000..49d601dfee --- /dev/null +++ b/hooks/use-voice-settings.ts @@ -0,0 +1,80 @@ +import { useLocalStorage } from "usehooks-ts"; + +/** + * Voice Activity Detection mode. + * + * Determines how voice input is triggered in the chat composer. + * + * @property "always-on" - Automatically detects speech via VAD (Silero model). + * Hands-free operation - just start speaking. + * + * @property "push-to-talk" - Manual recording control via button press. + * Click to start recording, click again to stop. More reliable, lower overhead. + */ +export type VadMode = "always-on" | "push-to-talk"; + +/** + * React hook for managing voice feature settings with localStorage persistence. + * + * **Purpose**: Persists user preferences for voice input mode, TTS, and Voice Agent across sessions. + * Settings are stored in browser localStorage and automatically hydrated on page load. + * + * **Used in**: + * - `components/multimodal-input.tsx` - Chat composer voice controls + * - `components/chat.tsx` - TTS playback state and Voice Agent mode + * + * **Storage Keys**: + * - `voice-tts-enabled`: Boolean for text-to-speech toggle + * - `voice-vad-mode`: "always-on" or "push-to-talk" string + * - `voice-agent-enabled`: Boolean for Voice Agent mode (disabled by default) + * + * **Default Values**: + * - TTS: Disabled (false) - User must opt-in to audio playback + * - VAD Mode: Push-to-talk - More reliable and predictable + * - Voice Agent: Disabled (false) - Feature is opt-in + * + * @returns Object with settings and setters: + * - ttsEnabled: Boolean indicating if TTS is enabled globally + * - setTtsEnabled: Function to toggle TTS on/off + * - vadMode: Current voice input mode ("always-on" | "push-to-talk") + * - setVadMode: Function to switch between VAD modes + * - voiceAgentEnabled: Boolean indicating if Voice Agent mode is enabled + * - setVoiceAgentEnabled: Function to toggle Voice Agent mode + * + * @example + * ```tsx + * const { ttsEnabled, setTtsEnabled, voiceAgentEnabled, setVoiceAgentEnabled } = useVoiceSettings(); + * + * // Toggle Voice Agent + * + * ``` + * + * @see components/voice-controls.tsx - UI controls for these settings + * @see hooks/use-voice-input.ts - Consumes vadMode for recording behavior + * @see components/voice-agent-overlay.tsx - Voice Agent UI + */ +export function useVoiceSettings() { + const [ttsEnabled, setTtsEnabled] = useLocalStorage( + "voice-tts-enabled", + false + ); + const [vadMode, setVadMode] = useLocalStorage( + "voice-vad-mode", + "push-to-talk" + ); + const [voiceAgentEnabled, setVoiceAgentEnabled] = useLocalStorage( + "voice-agent-enabled", + false + ); + + return { + ttsEnabled, + setTtsEnabled, + vadMode, + setVadMode, + voiceAgentEnabled, + setVoiceAgentEnabled, + }; +} diff --git a/lib/ai/models.test.ts b/lib/ai/models.test.ts index 1db7b416d4..284788dad3 100644 --- a/lib/ai/models.test.ts +++ b/lib/ai/models.test.ts @@ -57,7 +57,11 @@ export const titleModel = new MockLanguageModelV2({ { type: "finish", finishReason: "stop", - usage: { inputTokens: 3, outputTokens: 10, totalTokens: 13 }, + usage: { + inputTokens: 3, + outputTokens: 10, + totalTokens: 13, + }, }, ], }), diff --git a/lib/ai/prompts.ts b/lib/ai/prompts.ts index 80b87e623c..823fc7b079 100644 --- a/lib/ai/prompts.ts +++ b/lib/ai/prompts.ts @@ -117,4 +117,4 @@ export const titlePrompt = `\n - you will generate a short title based on the first message a user begins a conversation with - ensure it is not more than 80 characters long - the title should be a summary of the user's message - - do not use quotes or colons` + - do not use quotes or colons`; diff --git a/lib/ai/tools/create-document.ts b/lib/ai/tools/create-document.ts index bc9bc72021..f4b5c52cb5 100644 --- a/lib/ai/tools/create-document.ts +++ b/lib/ai/tools/create-document.ts @@ -64,7 +64,11 @@ export const createDocument = ({ session, dataStream }: CreateDocumentProps) => session, }); - dataStream.write({ type: "data-finish", data: null, transient: true }); + dataStream.write({ + type: "data-finish", + data: null, + transient: true, + }); return { id, diff --git a/lib/ai/tools/get-weather.ts b/lib/ai/tools/get-weather.ts index d83a269472..21a357b1ce 100644 --- a/lib/ai/tools/get-weather.ts +++ b/lib/ai/tools/get-weather.ts @@ -1,20 +1,29 @@ import { tool } from "ai"; import { z } from "zod"; -async function geocodeCity(city: string): Promise<{ latitude: number; longitude: number } | null> { +async function geocodeCity( + city: string +): Promise<{ latitude: number; longitude: number } | null> { try { const response = await fetch( `https://geocoding-api.open-meteo.com/v1/search?name=${encodeURIComponent(city)}&count=1&language=en&format=json` ); - - if (!response.ok) return null; - + + if (!response.ok) { + console.error( + `Failed to geocode city: ${city}`, + response.statusText, + response.status + ); + return null; + } + const data = await response.json(); - + if (!data.results || data.results.length === 0) { return null; } - + const result = data.results[0]; return { latitude: result.latitude, @@ -26,11 +35,15 @@ async function geocodeCity(city: string): Promise<{ latitude: number; longitude: } export const getWeather = tool({ - description: "Get the current weather at a location. You can provide either coordinates or a city name.", + description: + "Get the current weather at a location. You can provide either coordinates or a city name.", inputSchema: z.object({ latitude: z.number().optional(), longitude: z.number().optional(), - city: z.string().describe("City name (e.g., 'San Francisco', 'New York', 'London')").optional(), + city: z + .string() + .describe("City name (e.g., 'San Francisco', 'New York', 'London')") + .optional(), }), execute: async (input) => { let latitude: number; @@ -50,20 +63,21 @@ export const getWeather = tool({ longitude = input.longitude; } else { return { - error: "Please provide either a city name or both latitude and longitude coordinates.", + error: + "Please provide either a city name or both latitude and longitude coordinates.", }; } const response = await fetch( - `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}¤t=temperature_2m&hourly=temperature_2m&daily=sunrise,sunset&timezone=auto` + `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}¤t=temperature_2m&hourly=temperature_2m&daily=sunrise,sunset&timezone=auto&temperature_unit=fahrenheit` ); const weatherData = await response.json(); - + if ("city" in input) { weatherData.cityName = input.city; } - + return weatherData; }, }); diff --git a/lib/ai/tools/search-web.ts b/lib/ai/tools/search-web.ts new file mode 100644 index 0000000000..8cd4b1f9e0 --- /dev/null +++ b/lib/ai/tools/search-web.ts @@ -0,0 +1,129 @@ +/** + * Web Search Tool + * + * **Optional Service** - This tool uses Tavily API to search the web for current information. + * The tool is completely optional and the AI will gracefully handle its absence. + * + * If TAVILY_API_KEY is not configured: + * - The tool will return an error message + * - The AI will inform users that web search is unavailable + * - All other chat functionality continues to work normally + * + * To enable web search: + * 1. Get an API key from https://tavily.com + * 2. Add TAVILY_API_KEY to your .env.local file + * + * @see https://docs.tavily.com for API documentation + */ + +import { tavily } from "@tavily/core"; +import { tool } from "ai"; +import { z } from "zod"; + +const DEBUG = process.env.DEBUG === "true"; +const TAVILY_API_KEY = process.env.TAVILY_API_KEY; + +if (DEBUG) { + if (TAVILY_API_KEY) { + console.log("[searchWeb] Tavily web search enabled"); + } else { + console.log( + "[searchWeb] TAVILY_API_KEY not configured - web search will be unavailable" + ); + } +} + +const tavilyClient = TAVILY_API_KEY ? tavily({ apiKey: TAVILY_API_KEY }) : null; + +export const searchWeb = tool({ + description: + "Search the web for current information, facts, news, or any information not in your knowledge base. Use this when you need up-to-date information from the internet. Tavily provides AI-optimized search results with clean, relevant content.", + inputSchema: z.object({ + query: z + .string() + .describe( + "Search query to find information on the web (e.g., 'latest news about AI', 'how to make sourdough bread', 'current weather in Tokyo')" + ), + searchDepth: z + .enum(["basic", "advanced"]) + .optional() + .default("basic") + .describe( + "Search depth: 'basic' for quick results, 'advanced' for more comprehensive search" + ), + includeAnswer: z + .boolean() + .optional() + .default(true) + .describe( + "Whether to include an AI-generated answer summary based on search results" + ), + maxResults: z + .number() + .min(1) + .max(10) + .optional() + .default(5) + .describe("Maximum number of search results to return (1-10)"), + }), + execute: async ({ query, searchDepth, includeAnswer, maxResults }) => { + // Gracefully handle missing API key + if (!tavilyClient) { + if (DEBUG) { + console.log("[searchWeb] Request attempted without API key configured"); + } + return { + query, + error: + "Web search is not available. The administrator has not configured the TAVILY_API_KEY. Please ask me something from my existing knowledge instead.", + results: [], + }; + } + + try { + const response = await tavilyClient.search(query, { + searchDepth: searchDepth || "basic", + maxResults: maxResults || 5, + includeAnswer: includeAnswer !== false, + includeRawContent: false, + }); + + if (!response || !response.results || response.results.length === 0) { + return { + query, + error: "No search results found. Try rephrasing your query.", + }; + } + + return { + query: response.query || query, + answer: response.answer || null, + results: response.results.map((result) => ({ + title: result.title, + url: result.url, + content: result.content, + score: result.score, + })), + images: response.images || [], + responseTime: response.responseTime || null, + }; + } catch (error) { + console.error("[searchWeb] Tavily API error:", error); + + if (error instanceof Error) { + return { + query, + error: `Web search failed: ${error.message}. Please try rephrasing your question or ask me something from my existing knowledge.`, + results: [], + }; + } + + return { + query, + error: + "An unexpected error occurred while searching the web. Please try again or ask me something from my existing knowledge.", + results: [], + }; + } + }, +}); diff --git a/lib/ai/tools/update-document.ts b/lib/ai/tools/update-document.ts index cae5c801dc..3aa74924c8 100644 --- a/lib/ai/tools/update-document.ts +++ b/lib/ai/tools/update-document.ts @@ -50,7 +50,11 @@ export const updateDocument = ({ session, dataStream }: UpdateDocumentProps) => session, }); - dataStream.write({ type: "data-finish", data: null, transient: true }); + dataStream.write({ + type: "data-finish", + data: null, + transient: true, + }); return { id, diff --git a/lib/db/queries.ts b/lib/db/queries.ts index 20195140a8..533c705757 100644 --- a/lib/db/queries.ts +++ b/lib/db/queries.ts @@ -38,8 +38,13 @@ import { generateHashedPassword } from "./utils"; // use the Drizzle adapter for Auth.js / NextAuth // https://authjs.dev/reference/adapter/drizzle -// biome-ignore lint: Forbidden non-null assertion. -const client = postgres(process.env.POSTGRES_URL!); +if (!process.env.POSTGRES_URL) { + console.error("❌ POSTGRES_URL environment variable is not set!"); + console.error("Please configure POSTGRES_URL in your .env.local file"); + throw new Error("POSTGRES_URL environment variable is required"); +} + +const client = postgres(process.env.POSTGRES_URL); const db = drizzle(client); export async function getUser(email: string): Promise { @@ -99,7 +104,14 @@ export async function saveChat({ title, visibility, }); - } catch (_error) { + } catch (error: any) { + // Handle duplicate key error - if chat already exists, that's fine + if (error?.code === "23505" && error?.constraint_name === "Chat_pkey") { + console.log("Chat already exists, skipping insert:", id); + return; + } + console.error("Failed to save chat:", error); + console.error("Chat data:", { id, userId, title, visibility }); throw new ChatSDKError("bad_request:database", "Failed to save chat"); } } @@ -134,7 +146,7 @@ export async function deleteAllChatsByUserId({ userId }: { userId: string }) { return { deletedCount: 0 }; } - const chatIds = userChats.map(c => c.id); + const chatIds = userChats.map((c) => c.id); await db.delete(vote).where(inArray(vote.chatId, chatIds)); await db.delete(message).where(inArray(message.chatId, chatIds)); @@ -238,7 +250,9 @@ export async function getChatById({ id }: { id: string }) { } return selectedChat; - } catch (_error) { + } catch (error) { + console.error("Failed to get chat by id:", error); + console.error("Chat id:", id); throw new ChatSDKError("bad_request:database", "Failed to get chat by id"); } } @@ -246,7 +260,9 @@ export async function getChatById({ id }: { id: string }) { export async function saveMessages({ messages }: { messages: DBMessage[] }) { try { return await db.insert(message).values(messages); - } catch (_error) { + } catch (error) { + console.error("Failed to save messages:", error); + console.error("Messages data:", messages); throw new ChatSDKError("bad_request:database", "Failed to save messages"); } } diff --git a/lib/errors.ts b/lib/errors.ts index 090682216e..b7dd284c87 100644 --- a/lib/errors.ts +++ b/lib/errors.ts @@ -66,7 +66,10 @@ export class ChatSDKError extends Error { }); return Response.json( - { code: "", message: "Something went wrong. Please try again later." }, + { + code: "", + message: "Something went wrong. Please try again later.", + }, { status: statusCode } ); } diff --git a/lib/nlp-worker-client.ts b/lib/nlp-worker-client.ts new file mode 100644 index 0000000000..eef03b1e22 --- /dev/null +++ b/lib/nlp-worker-client.ts @@ -0,0 +1,279 @@ +const DEBUG = process.env.DEBUG === "true"; + +/** + * NLP Worker Client for Voice Chat + * + * **Optional Service** - This client connects to an external NLP Worker service + * for advanced End-of-Turn (EOT) detection. The service is completely optional. + * If unavailable, the application will fall back to simple heuristics. + * + * Simplified TypeScript client for communicating with the Python nlp-worker service. + * Based on: /Users/josiahbryan/devel/rubber/backend/src/services/ai/utils/NlpWorkerClient.js + * + * Supports: + * - EOT (End of Turn) detection for voice chat + * - Text embedding (GTE-Base-EN-v1.5) + * - Audio embedding (ECAPA-TDNN speaker embeddings) + * - Text generation (VibeThinker, Granite models) + * + * Environment Variables (Optional): + * - NLP_WORKER_URL: URL of the nlp-worker service (default: http://localhost:8097) + * - NLP_WORKER_API_KEY: API key for authentication + * + * If NLP_WORKER_URL is not set, all methods will return errors gracefully, + * allowing the application to use fallback logic. + */ + +export type EOTChatMessage = { + role: "user" | "assistant" | "system"; + content: string; +}; + +export type EOTRequest = { + chat_history: EOTChatMessage[]; + current_utterance: string; +}; + +export type EOTResponse = { + eou_probability: number; + unlikely_threshold: number; + is_end_of_utterance: boolean; +}; + +export type NlpWorkerError = { + error: string; + status?: number; +}; + +export type NlpWorkerClientOptions = { + nlpWorkerUrl?: string; + apiKey?: string; + maxRetries?: number; + /** Silence warnings when service is unavailable (useful for tests) */ + silent?: boolean; +}; + +/** + * Client for interacting with the NLP Worker service + * + * This client is designed to fail gracefully when the service is unavailable. + * All methods return either the result or an error object, never throwing exceptions. + */ +export class NlpWorkerClient { + private readonly nlpWorkerUrl: string; + private readonly apiKey: string; + private readonly maxRetries: number; + private readonly silent: boolean; + + constructor(options: NlpWorkerClientOptions = {}) { + this.nlpWorkerUrl = + options.nlpWorkerUrl || + process.env.NLP_WORKER_URL || + "http://localhost:8097"; + + this.apiKey = options.apiKey || process.env.NLP_WORKER_API_KEY || ""; + + this.maxRetries = options.maxRetries || 3; + this.silent = options.silent || false; + + // Log configuration status when debugging + if (!this.silent && DEBUG) { + const hasUrl = Boolean( + options.nlpWorkerUrl || process.env.NLP_WORKER_URL + ); + if (hasUrl) { + console.log("[NlpWorkerClient] Configured with NLP Worker service"); + } else { + console.log( + "[NlpWorkerClient] NLP_WORKER_URL not set - will use fallback heuristics" + ); + } + } + } + + /** + * Check if the NLP Worker service is configured and potentially available + */ + isConfigured(): boolean { + return Boolean(process.env.NLP_WORKER_URL); + } + + /** + * Detect End of Turn (EOT) for voice chat + * + * Determines if the user has finished speaking or is just pausing. + * Uses the livekit/turn-detector model. + * + * @param history - Array of chat messages (conversation context) + * @returns EOT response with probability and decision, or error + * + * @example + * ```typescript + * const client = new NlpWorkerClient(); + * const result = await client.detectEOT([ + * { role: 'user', content: 'Hello' }, + * { role: 'assistant', content: 'Hi there!' }, + * { role: 'user', content: 'Can you help me with' }, // Incomplete + * ]); + * + * if ('error' in result) { + * console.error('EOT failed:', result.error); + * } else { + * console.log('Is complete?', result.is_end_of_utterance); // false + * console.log('Probability:', result.eou_probability); // ~0.05 + * } + * ``` + */ + async detectEOT( + history: EOTChatMessage[] + ): Promise { + if (!history || history.length === 0) { + return { error: "No chat history provided" }; + } + + // Extract last message as current utterance + const messages = [...history]; + const lastMessage = messages.pop(); + + if (!lastMessage) { + return { error: "Chat history must have at least one message" }; + } + + // Format for Python API + const chatData = [ + { + id: "request", + chat_history: messages, + current_utterance: lastMessage.content, + }, + ]; + + const result = await this._callNlpWorker<{ results: EOTResponse[] }>({ + apiPath: "/api/eot", + method: "POST", + body: { chats: chatData }, + }); + + if ("error" in result) { + return result; + } + + // Return first result + return result.results?.[0] || { error: "No result from EOT endpoint" }; + } + + /** + * Get version information from NLP Worker + */ + version(): Promise<{ version: string; models: string[] } | NlpWorkerError> { + return this._callNlpWorker({ + apiPath: "/api/version", + method: "GET", + }); + } + + /** + * Internal method to call NLP Worker API + */ + private async _callNlpWorker({ + apiPath, + method = "POST", + body, + retryCount = 0, + }: { + apiPath: string; + method?: "GET" | "POST"; + body?: unknown; + retryCount?: number; + }): Promise { + // Early return if service not configured + if (!this.isConfigured()) { + return { + error: "NLP Worker service not configured", + status: 503, + }; + } + + const url = `${this.nlpWorkerUrl}${apiPath}`; + + try { + const response = await fetch(url, { + method, + headers: { + "Content-Type": "application/json", + ...(this.apiKey ? { Authorization: this.apiKey } : {}), + }, + body: body ? JSON.stringify(body) : undefined, + }); + + // Handle error responses + if (!response.ok) { + const errorText = await response.text(); + let errorData: unknown; + + try { + errorData = JSON.parse(errorText); + } catch { + errorData = errorText; + } + + // Retry on rate limits or service unavailable + if ( + [429, 502, 503].includes(response.status) && + retryCount < this.maxRetries + ) { + const delay = Math.random() * 1000 + retryCount * 500; + + if (!this.silent && DEBUG) { + console.log( + `[NlpWorkerClient] ${response.status} error, retrying in ${delay}ms (${retryCount + 1}/${this.maxRetries})` + ); + } + + await new Promise((resolve) => setTimeout(resolve, delay)); + + return this._callNlpWorker({ + apiPath, + method, + body, + retryCount: retryCount + 1, + }); + } + + // Service unavailable, return error for fallback + if (!this.silent && DEBUG) { + console.log( + "[NlpWorkerClient] Service unavailable - will use fallback logic" + ); + } + + return { + error: + typeof errorData === "object" && errorData && "error" in errorData + ? String(errorData.error) + : String(errorData), + status: response.status, + }; + } + + // Parse successful response + const data = (await response.json()) as T; + return data; + } catch (error) { + // Service unreachable, return error for fallback + if (!this.silent && DEBUG) { + console.log( + "[NlpWorkerClient] Service unreachable - will use fallback logic" + ); + } + + return { + error: error instanceof Error ? error.message : "Service unavailable", + status: 503, + }; + } + } +} + +// Export singleton instance for convenience +export const nlpWorkerClient = new NlpWorkerClient(); diff --git a/lib/types.ts b/lib/types.ts index 4a9ad92bdc..dcf71a0167 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -4,6 +4,7 @@ import type { ArtifactKind } from "@/components/artifact"; import type { createDocument } from "./ai/tools/create-document"; import type { getWeather } from "./ai/tools/get-weather"; import type { requestSuggestions } from "./ai/tools/request-suggestions"; +import type { searchWeb } from "./ai/tools/search-web"; import type { updateDocument } from "./ai/tools/update-document"; import type { Suggestion } from "./db/schema"; import type { AppUsage } from "./usage"; @@ -17,6 +18,7 @@ export const messageMetadataSchema = z.object({ export type MessageMetadata = z.infer; type weatherTool = InferUITool; +type searchWebTool = InferUITool; type createDocumentTool = InferUITool>; type updateDocumentTool = InferUITool>; type requestSuggestionsTool = InferUITool< @@ -25,6 +27,7 @@ type requestSuggestionsTool = InferUITool< export type ChatTools = { getWeather: weatherTool; + searchWeb: searchWebTool; createDocument: createDocumentTool; updateDocument: updateDocumentTool; requestSuggestions: requestSuggestionsTool; diff --git a/next.config.ts b/next.config.ts index 41d588e2ad..3be7c8e9fc 100644 --- a/next.config.ts +++ b/next.config.ts @@ -16,6 +16,25 @@ const nextConfig: NextConfig = { }, ], }, + // CORS headers required for ONNX/WASM (for VAD voice detection) + // biome-ignore lint/suspicious/useAwait: false positive + async headers() { + return [ + { + source: "/(.*)", + headers: [ + { + key: "Cross-Origin-Opener-Policy", + value: "same-origin", + }, + { + key: "Cross-Origin-Embedder-Policy", + value: "require-corp", + }, + ], + }, + ]; + }, }; export default nextConfig; diff --git a/package.json b/package.json index f85e609051..5b281371af 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "version": "3.1.0", "private": true, "scripts": { - "dev": "next dev --turbo", + "dev": "next dev --port 4251", "build": "tsx lib/db/migrate && next build", "start": "next start", "lint": "npx ultracite@latest check", @@ -15,7 +15,8 @@ "db:pull": "drizzle-kit pull", "db:check": "drizzle-kit check", "db:up": "drizzle-kit up", - "test": "export PLAYWRIGHT=True && pnpm exec playwright test" + "test": "export PLAYWRIGHT=True && pnpm exec playwright test", + "test:eot": "npx tsx lib/test-eot.ts" }, "dependencies": { "@ai-sdk/gateway": "^1.0.15", @@ -27,13 +28,17 @@ "@codemirror/state": "^6.5.0", "@codemirror/theme-one-dark": "^6.1.2", "@codemirror/view": "^6.35.3", + "@deepgram/sdk": "^4.11.2", "@icons-pack/react-simple-icons": "^13.7.0", "@opentelemetry/api": "^1.9.0", "@opentelemetry/api-logs": "^0.200.0", + "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-icons": "^1.3.0", "@radix-ui/react-select": "^2.2.6", + "@radix-ui/react-switch": "^1.2.6", "@radix-ui/react-use-controllable-state": "^1.2.2", "@radix-ui/react-visually-hidden": "^1.1.0", + "@tavily/core": "^0.5.13", "@vercel/analytics": "^1.3.1", "@vercel/blob": "^0.24.1", "@vercel/functions": "^2.0.0", @@ -52,6 +57,7 @@ "fast-deep-equal": "^3.1.3", "framer-motion": "^11.3.19", "geist": "^1.3.1", + "groq-sdk": "^0.35.0", "katex": "^0.16.25", "lucide-react": "^0.446.0", "nanoid": "^5.0.8", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fba1d7dd4f..0419b2530e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -35,6 +35,9 @@ importers: '@codemirror/view': specifier: ^6.35.3 version: 6.36.4 + '@deepgram/sdk': + specifier: ^4.11.2 + version: 4.11.2(bufferutil@4.0.9) '@icons-pack/react-simple-icons': specifier: ^13.7.0 version: 13.8.0(react@19.0.0-rc-45804af1-20241021) @@ -44,18 +47,27 @@ importers: '@opentelemetry/api-logs': specifier: ^0.200.0 version: 0.200.0 + '@radix-ui/react-dialog': + specifier: ^1.1.15 + version: 1.1.15(@types/react-dom@18.3.5(@types/react@18.3.18))(@types/react@18.3.18)(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021) '@radix-ui/react-icons': specifier: ^1.3.0 version: 1.3.2(react@19.0.0-rc-45804af1-20241021) '@radix-ui/react-select': specifier: ^2.2.6 version: 2.2.6(@types/react-dom@18.3.5(@types/react@18.3.18))(@types/react@18.3.18)(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021) + '@radix-ui/react-switch': + specifier: ^1.2.6 + version: 1.2.6(@types/react-dom@18.3.5(@types/react@18.3.18))(@types/react@18.3.18)(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021) '@radix-ui/react-use-controllable-state': specifier: ^1.2.2 version: 1.2.2(@types/react@18.3.18)(react@19.0.0-rc-45804af1-20241021) '@radix-ui/react-visually-hidden': specifier: ^1.1.0 version: 1.1.2(@types/react-dom@18.3.5(@types/react@18.3.18))(@types/react@18.3.18)(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021) + '@tavily/core': + specifier: ^0.5.13 + version: 0.5.13 '@vercel/analytics': specifier: ^1.3.1 version: 1.5.0(next@15.3.0-canary.31(@opentelemetry/api@1.9.0)(@playwright/test@1.51.0)(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021) @@ -110,6 +122,9 @@ importers: geist: specifier: ^1.3.1 version: 1.3.1(next@15.3.0-canary.31(@opentelemetry/api@1.9.0)(@playwright/test@1.51.0)(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021)) + groq-sdk: + specifier: ^0.35.0 + version: 0.35.0 katex: specifier: ^0.16.25 version: 0.16.25 @@ -458,6 +473,14 @@ packages: '@codemirror/view@6.36.4': resolution: {integrity: sha512-ZQ0V5ovw/miKEXTvjgzRyjnrk9TwriUB1k4R5p7uNnHR9Hus+D1SXHGdJshijEzPFjU25xea/7nhIeSqYFKdbA==} + '@deepgram/captions@1.2.0': + resolution: {integrity: sha512-8B1C/oTxTxyHlSFubAhNRgCbQ2SQ5wwvtlByn8sDYZvdDtdn/VE2yEPZ4BvUnrKWmsbTQY6/ooLV+9Ka2qmDSQ==} + engines: {node: '>=18.0.0'} + + '@deepgram/sdk@4.11.2': + resolution: {integrity: sha512-lKGxuXxlSixC8bB0BnzmIpbVjUSgYtz17cqvrgv0ZjmazgUPkuUj9egQPj6k+fbPX8wRzWEqlhrL/DXlXqeDXA==} + engines: {node: '>=18.0.0'} + '@drizzle-team/brocli@0.10.2': resolution: {integrity: sha512-z33Il7l5dKjUgGULTqBsQBQwckHh5AbIuxhdsIxDDiZAzBOrZO6q9ogcWC65kU382AfynTfgNumVcNIjuIua6w==} @@ -1148,31 +1171,31 @@ packages: resolution: {integrity: sha512-BdBGhQBh8IjZ2oIIX6F2/Q3LKm/FDDKi6ccYKcBTeilh6SNdNKveDOLk73BkSJjQLJk6qe4Yh+hHw1UPhCDdrg==} engines: {node: '>=14'} peerDependencies: - '@opentelemetry/api': 1.9.0 + '@opentelemetry/api': ^1.3.0 '@opentelemetry/resources@1.30.1': resolution: {integrity: sha512-5UxZqiAgLYGFjS4s9qm5mBVo433u+dSPUFWVWXmLAD4wB65oMCoXaJP1KJa9DIYYMeHu3z4BZcStG3LC593cWA==} engines: {node: '>=14'} peerDependencies: - '@opentelemetry/api': 1.9.0 + '@opentelemetry/api': '>=1.0.0 <1.10.0' '@opentelemetry/sdk-logs@0.57.2': resolution: {integrity: sha512-TXFHJ5c+BKggWbdEQ/inpgIzEmS2BGQowLE9UhsMd7YYlUfBQJ4uax0VF/B5NYigdM/75OoJGhAV3upEhK+3gg==} engines: {node: '>=14'} peerDependencies: - '@opentelemetry/api': 1.9.0 + '@opentelemetry/api': '>=1.4.0 <1.10.0' '@opentelemetry/sdk-metrics@1.30.1': resolution: {integrity: sha512-q9zcZ0Okl8jRgmy7eNW3Ku1XSgg3sDLa5evHZpCwjspw7E8Is4K/haRPDJrBcX3YSn/Y7gUvFnByNYEKQNbNog==} engines: {node: '>=14'} peerDependencies: - '@opentelemetry/api': 1.9.0 + '@opentelemetry/api': '>=1.3.0 <1.10.0' '@opentelemetry/sdk-trace-base@1.30.1': resolution: {integrity: sha512-jVPgBbH1gCy2Lb7X0AVQ8XAfgg0pJ4nvl8/IiQA6nxOsPvS+0zMJaFSs2ltXe0J6C8dqjcnpyqINDJmU30+uOg==} engines: {node: '>=14'} peerDependencies: - '@opentelemetry/api': 1.9.0 + '@opentelemetry/api': '>=1.0.0 <1.10.0' '@opentelemetry/semantic-conventions@1.28.0': resolution: {integrity: sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA==} @@ -2186,6 +2209,9 @@ packages: peerDependencies: tailwindcss: '>=3.0.0 || insiders || >=4.0.0-alpha.20 || >=4.0.0-beta.1' + '@tavily/core@0.5.13': + resolution: {integrity: sha512-H7QzDDQews4r7HCrCnbAM8RyqlQt148G0UnjfCHMcOOrsP+8EwDqeOP4G47RrFGXEiS/jbTftGrDRAQnHUGnqA==} + '@tokenlens/core@1.3.0': resolution: {integrity: sha512-d8YNHNC+q10bVpi95fELJwJyPVf1HfvBEI18eFQxRSZTdByXrP+f/ZtlhSzkx0Jl0aEmYVeBA5tPeeYRioLViQ==} @@ -2344,6 +2370,12 @@ packages: '@types/ms@2.1.0': resolution: {integrity: sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==} + '@types/node-fetch@2.6.13': + resolution: {integrity: sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==} + + '@types/node@18.19.130': + resolution: {integrity: sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==} + '@types/node@22.13.10': resolution: {integrity: sha512-I6LPUvlRH+O6VRUqYOcMudhaIdUVWfsjnZavnsraHvpBwaEyMN29ry+0UVJhImYL16xsscu0aske3yA+uPOWfw==} @@ -2480,6 +2512,10 @@ packages: '@vitest/utils@3.2.4': resolution: {integrity: sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==} + abort-controller@3.0.0: + resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} + engines: {node: '>=6.5'} + acorn-import-attributes@1.9.5: resolution: {integrity: sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==} peerDependencies: @@ -2490,6 +2526,14 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agent-base@7.1.4: + resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} + engines: {node: '>= 14'} + + agentkeepalive@4.6.0: + resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} + engines: {node: '>= 8.0.0'} + ai@5.0.26: resolution: {integrity: sha512-bGNtG+nYQ2U+5mzuLbxIg9WxGQJ2u5jv2gYgP8C+CJ1YI4qqIjvjOgGEZWzvNet8jiOGIlqstsht9aQefKzmBw==} engines: {node: '>=18'} @@ -2510,9 +2554,18 @@ packages: async-retry@1.3.3: resolution: {integrity: sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==} + asynckit@0.4.0: + resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + + axios@1.13.2: + resolution: {integrity: sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==} + bail@2.0.2: resolution: {integrity: sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==} + base64-js@1.5.1: + resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + bcrypt-ts@5.0.3: resolution: {integrity: sha512-2FcgD12xPbwCoe5i9/HK0jJ1xA1m+QfC1e6htG9Bl/hNOnLyaFmQSlqLKcfe3QdnoMPKpKEGFCbESBTg+SJNOw==} engines: {node: '>=18'} @@ -2536,6 +2589,10 @@ packages: resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==} engines: {node: '>=8'} + call-bind-apply-helpers@1.0.2: + resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} + engines: {node: '>= 0.4'} + caniuse-lite@1.0.30001704: resolution: {integrity: sha512-+L2IgBbV6gXB4ETf0keSvLr7JUrRVbIaB/lrQ1+z8mRcQiisG5k+lG6O4n6Y5q6f5EuNfaYXKgymucphlEXQew==} @@ -2619,6 +2676,10 @@ packages: resolution: {integrity: sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==} engines: {node: '>=12.5.0'} + combined-stream@1.0.8: + resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} + engines: {node: '>= 0.8'} + comma-separated-tokens@1.0.8: resolution: {integrity: sha512-GHuDRO12Sypu2cV70d1dkA2EUmXHgntrzbpvOB+Qy+49ypNfGgFQIC2fhhXbnyrJRynDCAARsT7Ou0M6hirpfw==} @@ -2660,6 +2721,9 @@ packages: crelt@1.0.6: resolution: {integrity: sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==} + cross-fetch@3.2.0: + resolution: {integrity: sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==} + cssesc@3.0.0: resolution: {integrity: sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==} engines: {node: '>=4'} @@ -2862,6 +2926,10 @@ packages: delaunator@5.0.1: resolution: {integrity: sha512-8nvh+XBe96aCESrGOqMp/84b13H9cdKbG5P2ejQCh4d4sK9RL4371qou9drQjMhvnPmhWl5hnmqbEE0fXr9Xnw==} + delayed-stream@1.0.0: + resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} + engines: {node: '>=0.4.0'} + dequal@2.0.3: resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==} engines: {node: '>=6'} @@ -2979,6 +3047,10 @@ packages: sqlite3: optional: true + dunder-proto@1.0.1: + resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} + engines: {node: '>= 0.4'} + embla-carousel-react@8.6.0: resolution: {integrity: sha512-0/PjqU7geVmo6F734pmPqpyHqiM99olvyecY7zdweCw+6tKEXnrE90pBiBbMMU8s5tICemzpQ3hi5EpxzGW+JA==} peerDependencies: @@ -3004,9 +3076,25 @@ packages: resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==} engines: {node: '>=0.12'} + es-define-property@1.0.1: + resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==} + engines: {node: '>= 0.4'} + + es-errors@1.3.0: + resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} + engines: {node: '>= 0.4'} + es-module-lexer@1.7.0: resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==} + es-object-atoms@1.1.1: + resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==} + engines: {node: '>= 0.4'} + + es-set-tostringtag@2.1.0: + resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==} + engines: {node: '>= 0.4'} + esbuild-register@3.6.0: resolution: {integrity: sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==} peerDependencies: @@ -3037,6 +3125,14 @@ packages: estree-walker@3.0.3: resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + event-target-shim@5.0.1: + resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} + engines: {node: '>=6'} + + events@3.3.0: + resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==} + engines: {node: '>=0.8.x'} + eventsource-parser@3.0.6: resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==} engines: {node: '>=18.0.0'} @@ -3066,10 +3162,30 @@ packages: picomatch: optional: true + follow-redirects@1.15.11: + resolution: {integrity: sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==} + engines: {node: '>=4.0'} + peerDependencies: + debug: '*' + peerDependenciesMeta: + debug: + optional: true + + form-data-encoder@1.7.2: + resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==} + + form-data@4.0.5: + resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} + engines: {node: '>= 6'} + format@0.2.2: resolution: {integrity: sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww==} engines: {node: '>=0.4.x'} + formdata-node@4.4.1: + resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==} + engines: {node: '>= 12.20'} + framer-motion@11.18.2: resolution: {integrity: sha512-5F5Och7wrvtLVElIpclDT0CBzMVg3dL22B64aZwHtsIY8RB4mXICLrkajK4G9R+ieSAGcgrLeae2SeUTg2pr6w==} peerDependencies: @@ -3102,10 +3218,18 @@ packages: peerDependencies: next: '>=13.2.0' + get-intrinsic@1.3.0: + resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==} + engines: {node: '>= 0.4'} + get-nonce@1.0.1: resolution: {integrity: sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==} engines: {node: '>=6'} + get-proto@1.0.1: + resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} + engines: {node: '>= 0.4'} + get-tsconfig@4.10.0: resolution: {integrity: sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==} @@ -3113,12 +3237,27 @@ packages: resolution: {integrity: sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg==} engines: {node: '>=18'} + gopd@1.2.0: + resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} + engines: {node: '>= 0.4'} + graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + groq-sdk@0.35.0: + resolution: {integrity: sha512-jrRqtV4kwtYFpuiQ5EM+vVUM0xS4kQAyQx1tFpbHOCoaM/ad6ECMZxPT9bzA7SkXVDzhF1TYh5SYqxYiFqk/Pw==} + hachure-fill@0.5.2: resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==} + has-symbols@1.1.0: + resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==} + engines: {node: '>= 0.4'} + + has-tostringtag@1.0.2: + resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==} + engines: {node: '>= 0.4'} + hasown@2.0.2: resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} engines: {node: '>= 0.4'} @@ -3180,6 +3319,13 @@ packages: html-void-elements@3.0.0: resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==} + https-proxy-agent@7.0.6: + resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} + engines: {node: '>= 14'} + + humanize-ms@1.2.1: + resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==} + iconv-lite@0.6.3: resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==} engines: {node: '>=0.10.0'} @@ -3243,6 +3389,9 @@ packages: jose@5.10.0: resolution: {integrity: sha512-s+3Al/p9g32Iq+oqXxkW//7jk2Vig6FF1CFqzVXoTUXt2qz89YWbL+OwS17NFYEvxC35n0FKeGO2LGYSxeM2Gg==} + js-tiktoken@1.0.21: + resolution: {integrity: sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==} + js-tokens@9.0.1: resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==} @@ -3398,6 +3547,10 @@ packages: engines: {node: '>= 20'} hasBin: true + math-intrinsics@1.1.0: + resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} + engines: {node: '>= 0.4'} + mdast-util-find-and-replace@3.0.2: resolution: {integrity: sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==} @@ -3539,6 +3692,14 @@ packages: micromark@4.0.2: resolution: {integrity: sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==} + mime-db@1.52.0: + resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} + engines: {node: '>= 0.6'} + + mime-types@2.1.35: + resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} + engines: {node: '>= 0.6'} + mlly@1.8.0: resolution: {integrity: sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g==} @@ -3612,6 +3773,20 @@ packages: sass: optional: true + node-domexception@1.0.0: + resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} + engines: {node: '>=10.5.0'} + deprecated: Use your platform's native DOMException instead + + node-fetch@2.7.0: + resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} + engines: {node: 4.x || >=6.0.0} + peerDependencies: + encoding: ^0.1.0 + peerDependenciesMeta: + encoding: + optional: true + node-gyp-build@4.8.4: resolution: {integrity: sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==} hasBin: true @@ -3824,6 +3999,9 @@ packages: prosemirror-view@1.38.1: resolution: {integrity: sha512-4FH/uM1A4PNyrxXbD+RAbAsf0d/mM0D/wAKSVVWK7o0A9Q/oOXJBrw786mBf2Vnrs/Edly6dH6Z2gsb7zWwaUw==} + proxy-from-env@1.1.0: + resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} + punycode.js@2.3.1: resolution: {integrity: sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==} engines: {node: '>=6'} @@ -4143,6 +4321,9 @@ packages: tokenlens@1.3.0: resolution: {integrity: sha512-qrwHFO7CI8HEd+UvKjlL+veTzCVr3N4AYp3cquGL6z62Q/OtoEHbTv5uNqiiejP54y7eR+h8VVRRpZbm3t/99Q==} + tr46@0.0.3: + resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + trim-lines@3.0.1: resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==} @@ -4189,6 +4370,9 @@ packages: resolution: {integrity: sha512-85rzcGk+KKF3jhBGLYQj57JYAQ50Mun6PAifN4B6cCmCI8GNjgvUbp0f4DlpOfl92mq8CzouCzzcVxwcNNmTmw==} hasBin: true + undici-types@5.26.5: + resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} + undici-types@6.20.0: resolution: {integrity: sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==} @@ -4376,6 +4560,16 @@ packages: web-namespaces@2.0.1: resolution: {integrity: sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==} + web-streams-polyfill@4.0.0-beta.3: + resolution: {integrity: sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==} + engines: {node: '>= 14'} + + webidl-conversions@3.0.1: + resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + + whatwg-url@5.0.0: + resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} + why-is-node-running@2.3.0: resolution: {integrity: sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==} engines: {node: '>=8'} @@ -4628,6 +4822,23 @@ snapshots: style-mod: 4.1.2 w3c-keyname: 2.2.8 + '@deepgram/captions@1.2.0': + dependencies: + dayjs: 1.11.19 + + '@deepgram/sdk@4.11.2(bufferutil@4.0.9)': + dependencies: + '@deepgram/captions': 1.2.0 + '@types/node': 18.19.130 + cross-fetch: 3.2.0 + deepmerge: 4.3.1 + events: 3.3.0 + ws: 8.18.1(bufferutil@4.0.9) + transitivePeerDependencies: + - bufferutil + - encoding + - utf-8-validate + '@drizzle-team/brocli@0.10.2': {} '@emnapi/runtime@1.3.1': @@ -6098,6 +6309,15 @@ snapshots: postcss-selector-parser: 6.0.10 tailwindcss: 4.1.16 + '@tavily/core@0.5.13': + dependencies: + axios: 1.13.2 + https-proxy-agent: 7.0.6 + js-tiktoken: 1.0.21 + transitivePeerDependencies: + - debug + - supports-color + '@tokenlens/core@1.3.0': {} '@tokenlens/fetch@1.3.0': @@ -6282,6 +6502,15 @@ snapshots: '@types/ms@2.1.0': {} + '@types/node-fetch@2.6.13': + dependencies: + '@types/node': 22.13.10 + form-data: 4.0.5 + + '@types/node@18.19.130': + dependencies: + undici-types: 5.26.5 + '@types/node@22.13.10': dependencies: undici-types: 6.20.0 @@ -6411,12 +6640,22 @@ snapshots: loupe: 3.2.1 tinyrainbow: 2.0.0 + abort-controller@3.0.0: + dependencies: + event-target-shim: 5.0.1 + acorn-import-attributes@1.9.5(acorn@8.15.0): dependencies: acorn: 8.15.0 acorn@8.15.0: {} + agent-base@7.1.4: {} + + agentkeepalive@4.6.0: + dependencies: + humanize-ms: 1.2.1 + ai@5.0.26(zod@3.25.76): dependencies: '@ai-sdk/gateway': 1.0.15(zod@3.25.76) @@ -6437,8 +6676,20 @@ snapshots: dependencies: retry: 0.13.1 + asynckit@0.4.0: {} + + axios@1.13.2: + dependencies: + follow-redirects: 1.15.11 + form-data: 4.0.5 + proxy-from-env: 1.1.0 + transitivePeerDependencies: + - debug + bail@2.0.2: {} + base64-js@1.5.1: {} + bcrypt-ts@5.0.3: {} buffer-from@1.1.2: {} @@ -6456,6 +6707,11 @@ snapshots: cac@6.7.14: {} + call-bind-apply-helpers@1.0.2: + dependencies: + es-errors: 1.3.0 + function-bind: 1.1.2 + caniuse-lite@1.0.30001704: {} ccount@2.0.1: {} @@ -6546,6 +6802,10 @@ snapshots: color-string: 1.9.1 optional: true + combined-stream@1.0.8: + dependencies: + delayed-stream: 1.0.0 + comma-separated-tokens@1.0.8: {} comma-separated-tokens@2.0.3: {} @@ -6574,6 +6834,12 @@ snapshots: crelt@1.0.6: {} + cross-fetch@3.2.0: + dependencies: + node-fetch: 2.7.0 + transitivePeerDependencies: + - encoding + cssesc@3.0.0: {} csstype@3.1.3: {} @@ -6786,6 +7052,8 @@ snapshots: dependencies: robust-predicates: 3.0.2 + delayed-stream@1.0.0: {} + dequal@2.0.3: {} detect-libc@2.0.3: {} @@ -6823,6 +7091,12 @@ snapshots: postgres: 3.4.5 react: 19.0.0-rc-45804af1-20241021 + dunder-proto@1.0.1: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-errors: 1.3.0 + gopd: 1.2.0 + embla-carousel-react@8.6.0(react@19.0.0-rc-45804af1-20241021): dependencies: embla-carousel: 8.6.0 @@ -6844,8 +7118,23 @@ snapshots: entities@6.0.1: {} + es-define-property@1.0.1: {} + + es-errors@1.3.0: {} + es-module-lexer@1.7.0: {} + es-object-atoms@1.1.1: + dependencies: + es-errors: 1.3.0 + + es-set-tostringtag@2.1.0: + dependencies: + es-errors: 1.3.0 + get-intrinsic: 1.3.0 + has-tostringtag: 1.0.2 + hasown: 2.0.2 + esbuild-register@3.6.0(esbuild@0.19.12): dependencies: debug: 4.4.0 @@ -6940,6 +7229,10 @@ snapshots: dependencies: '@types/estree': 1.0.6 + event-target-shim@5.0.1: {} + + events@3.3.0: {} + eventsource-parser@3.0.6: {} expect-type@1.2.2: {} @@ -6958,8 +7251,25 @@ snapshots: optionalDependencies: picomatch: 4.0.3 + follow-redirects@1.15.11: {} + + form-data-encoder@1.7.2: {} + + form-data@4.0.5: + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + es-set-tostringtag: 2.1.0 + hasown: 2.0.2 + mime-types: 2.1.35 + format@0.2.2: {} + formdata-node@4.4.1: + dependencies: + node-domexception: 1.0.0 + web-streams-polyfill: 4.0.0-beta.3 + framer-motion@11.18.2(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021): dependencies: motion-dom: 11.18.1 @@ -6981,18 +7291,56 @@ snapshots: dependencies: next: 15.3.0-canary.31(@opentelemetry/api@1.9.0)(@playwright/test@1.51.0)(react-dom@19.0.0-rc-45804af1-20241021(react@19.0.0-rc-45804af1-20241021))(react@19.0.0-rc-45804af1-20241021) + get-intrinsic@1.3.0: + dependencies: + call-bind-apply-helpers: 1.0.2 + es-define-property: 1.0.1 + es-errors: 1.3.0 + es-object-atoms: 1.1.1 + function-bind: 1.1.2 + get-proto: 1.0.1 + gopd: 1.2.0 + has-symbols: 1.1.0 + hasown: 2.0.2 + math-intrinsics: 1.1.0 + get-nonce@1.0.1: {} + get-proto@1.0.1: + dependencies: + dunder-proto: 1.0.1 + es-object-atoms: 1.1.1 + get-tsconfig@4.10.0: dependencies: resolve-pkg-maps: 1.0.0 globals@15.15.0: {} + gopd@1.2.0: {} + graceful-fs@4.2.11: {} + groq-sdk@0.35.0: + dependencies: + '@types/node': 18.19.130 + '@types/node-fetch': 2.6.13 + abort-controller: 3.0.0 + agentkeepalive: 4.6.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + transitivePeerDependencies: + - encoding + hachure-fill@0.5.2: {} + has-symbols@1.1.0: {} + + has-tostringtag@1.0.2: + dependencies: + has-symbols: 1.1.0 + hasown@2.0.2: dependencies: function-bind: 1.1.2 @@ -7135,6 +7483,17 @@ snapshots: html-void-elements@3.0.0: {} + https-proxy-agent@7.0.6: + dependencies: + agent-base: 7.1.4 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + + humanize-ms@1.2.1: + dependencies: + ms: 2.1.3 + iconv-lite@0.6.3: dependencies: safer-buffer: 2.1.2 @@ -7189,6 +7548,10 @@ snapshots: jose@5.10.0: {} + js-tiktoken@1.0.21: + dependencies: + base64-js: 1.5.1 + js-tokens@9.0.1: {} json-schema@0.4.0: {} @@ -7318,6 +7681,8 @@ snapshots: marked@16.4.1: {} + math-intrinsics@1.1.0: {} + mdast-util-find-and-replace@3.0.2: dependencies: '@types/mdast': 4.0.4 @@ -7692,7 +8057,7 @@ snapshots: micromark@4.0.2: dependencies: '@types/debug': 4.1.12 - debug: 4.4.0 + debug: 4.4.3 decode-named-character-reference: 1.1.0 devlop: 1.1.0 micromark-core-commonmark: 2.0.3 @@ -7711,6 +8076,12 @@ snapshots: transitivePeerDependencies: - supports-color + mime-db@1.52.0: {} + + mime-types@2.1.35: + dependencies: + mime-db: 1.52.0 + mlly@1.8.0: dependencies: acorn: 8.15.0 @@ -7772,6 +8143,12 @@ snapshots: - '@babel/core' - babel-plugin-macros + node-domexception@1.0.0: {} + + node-fetch@2.7.0: + dependencies: + whatwg-url: 5.0.0 + node-gyp-build@4.8.4: optional: true @@ -8040,6 +8417,8 @@ snapshots: prosemirror-state: 1.4.3 prosemirror-transform: 1.10.3 + proxy-from-env@1.1.0: {} + punycode.js@2.3.1: {} quansync@0.2.11: {} @@ -8499,6 +8878,8 @@ snapshots: '@tokenlens/helpers': 1.3.0 '@tokenlens/models': 1.3.0 + tr46@0.0.3: {} + trim-lines@3.0.1: {} trough@2.2.0: {} @@ -8564,6 +8945,8 @@ snapshots: - typescript - yaml + undici-types@5.26.5: {} + undici-types@6.20.0: {} undici@5.28.5: @@ -8764,6 +9147,15 @@ snapshots: web-namespaces@2.0.1: {} + web-streams-polyfill@4.0.0-beta.3: {} + + webidl-conversions@3.0.1: {} + + whatwg-url@5.0.0: + dependencies: + tr46: 0.0.3 + webidl-conversions: 3.0.1 + why-is-node-running@2.3.0: dependencies: siginfo: 2.0.0 @@ -8772,7 +9164,6 @@ snapshots: ws@8.18.1(bufferutil@4.0.9): optionalDependencies: bufferutil: 4.0.9 - optional: true xtend@4.0.2: {} diff --git a/tests/prompts/utils.ts b/tests/prompts/utils.ts index 0bb33e10a7..4529924d15 100644 --- a/tests/prompts/utils.ts +++ b/tests/prompts/utils.ts @@ -94,7 +94,11 @@ export const getResponseChunksByPrompt = ( { type: "finish", finishReason: "stop", - usage: { inputTokens: 3, outputTokens: 10, totalTokens: 13 }, + usage: { + inputTokens: 3, + outputTokens: 10, + totalTokens: 13, + }, }, ]; } @@ -108,7 +112,11 @@ export const getResponseChunksByPrompt = ( { type: "finish", finishReason: "stop", - usage: { inputTokens: 3, outputTokens: 10, totalTokens: 13 }, + usage: { + inputTokens: 3, + outputTokens: 10, + totalTokens: 13, + }, }, ]; } @@ -257,7 +265,10 @@ As we move forward, Silicon Valley continues to reinvent itself. While some pred type: "tool-call", toolCallId: "call_456", toolName: "getWeather", - input: JSON.stringify({ latitude: 37.7749, longitude: -122.4194 }), + input: JSON.stringify({ + latitude: 37.7749, + longitude: -122.4194, + }), }, { type: "finish",