diff --git a/echo-frontend/src/components/BookDropZone.tsx b/echo-frontend/src/components/BookDropZone.tsx index be0b26a..aa6c115 100644 --- a/echo-frontend/src/components/BookDropZone.tsx +++ b/echo-frontend/src/components/BookDropZone.tsx @@ -3,7 +3,6 @@ import { cn, cleanTextWithGemini } from "@/lib/utils"; import { FileUp } from "lucide-react"; import { Subheader2, Paragraph } from "@/components/Typography"; import ePub from "epubjs"; -import pdfToText from "react-pdftotext"; interface BookDropZoneProps { onTextExtracted: (text: string) => void; @@ -35,12 +34,15 @@ export function BookDropZone({ onTextExtracted }: BookDropZoneProps) { const extractTextFromPdf = async (file: File) => { try { setIsProcessing(true); + + // Dynamic import of pdfToText + const { default: pdfToText } = await import("react-pdftotext"); const text = await pdfToText(file); - // console.log("Text:", text.slice(0, 1000)); + const cleanedText = await cleanTextWithGemini(text); console.log("Cleaned text:", cleanedText); onTextExtracted(cleanedText); - console.log("Extracted and cleaned text from PDF"); + setSelectedFile(file.name); } catch (error) { console.error("Failed to extract text from PDF", error); } finally { diff --git a/echo-frontend/src/hooks/useAudioPlayer.ts b/echo-frontend/src/hooks/useAudioPlayer.ts index 94400cf..b6ae283 100644 --- a/echo-frontend/src/hooks/useAudioPlayer.ts +++ b/echo-frontend/src/hooks/useAudioPlayer.ts @@ -6,11 +6,21 @@ export function useAudioPlayer(text: string) { const [currentTimeSeconds, setCurrentTimeSeconds] = useState(0); const [duration, setDuration] = useState(0); const [bufferingProgress, setBufferingProgress] = useState(0); + const [estimatedTotalDuration, setEstimatedTotalDuration] = useState(0); const audioRef = useRef(null); const mediaSourceRef = useRef(null); const sourceBufferRef = useRef(null); - const chunksReceivedRef = useRef(0); - const totalChunksRef = useRef(0); + const receivedBytesRef = useRef(0); + + // Calculate estimated duration based on text length and average speaking rate + useEffect(() => { + if (text) { + // Estimate ~3 words per second for speech + const wordCount = text.split(/\s+/).length; + const estimatedSeconds = wordCount / 3; + setEstimatedTotalDuration(estimatedSeconds); + } + }, [text]); // Prepare audio when text changes useEffect(() => { @@ -19,7 +29,8 @@ export function useAudioPlayer(text: string) { setIsLoading(true); setBufferingProgress(0); - setDuration(0); // Reset duration when starting new audio + setDuration(0); + receivedBytesRef.current = 0; try { // Create MediaSource instance @@ -81,7 +92,6 @@ export function useAudioPlayer(text: string) { const { done, value } = await reader.read(); if (done) break; - // Wait if the buffer is updating await new Promise((resolve) => { const checkBuffer = () => { if (!sourceBuffer.updating) { @@ -93,17 +103,18 @@ export function useAudioPlayer(text: string) { checkBuffer(); }); - console.log("value", value); - - // Append chunk to source buffer sourceBuffer.appendBuffer(value); - receivedLength += value.length; + receivedBytesRef.current += value.length; - // Update buffering progress based on received data - // Note: This is an estimate since we don't know the total size + // Update buffering progress based on estimated total size + // Assuming ~15KB per second of audio + const estimatedTotalBytes = estimatedTotalDuration * 15 * 1024; setBufferingProgress( - Math.min((receivedLength / (1024 * 1024)) * 20, 100) - ); // Assuming ~1MB total size + Math.min( + (receivedBytesRef.current / estimatedTotalBytes) * 100, + 99 + ) + ); } // Close media source when all chunks are received @@ -125,7 +136,7 @@ export function useAudioPlayer(text: string) { }; prepareAudio(); - }, [text]); + }, [text, estimatedTotalDuration]); // Cleanup on unmount useEffect(() => { @@ -179,9 +190,14 @@ export function useAudioPlayer(text: string) { audioRef.current.pause(); } - const newTime = (progressPercent / 100) * audioRef.current.duration; - audioRef.current.currentTime = newTime; - setCurrentTimeSeconds(newTime); + // Use the actual duration if available, otherwise use estimated duration + const totalDuration = duration || estimatedTotalDuration; + const newTime = (progressPercent / 100) * totalDuration; + + // Clamp the new time to the available duration + const clampedTime = Math.min(newTime, audioRef.current.duration || 0); + audioRef.current.currentTime = clampedTime; + setCurrentTimeSeconds(clampedTime); if (wasPlaying) { audioRef.current.play(); @@ -189,16 +205,24 @@ export function useAudioPlayer(text: string) { } }; + // Return the current time as a percentage of the total estimated/actual duration + const getCurrentProgress = () => { + const totalDuration = duration || estimatedTotalDuration; + if (!totalDuration) return 0; + return (currentTimeSeconds / totalDuration) * 100; + }; + return { isPlaying, isLoading, currentTimeSeconds, - duration, + duration: duration || estimatedTotalDuration, // Use estimated duration until actual is available bufferingProgress, handlePlayPause, handleSkipForward, handleSkipBack, handleProgressChange, setIsPlaying, + getCurrentProgress, // Add this new helper function }; } diff --git a/echo-frontend/src/pages/api/tts-eleven.ts b/echo-frontend/src/pages/api/tts-eleven.ts index 7b22f1a..7a41ca2 100644 --- a/echo-frontend/src/pages/api/tts-eleven.ts +++ b/echo-frontend/src/pages/api/tts-eleven.ts @@ -9,45 +9,71 @@ const client = new ElevenLabsClient({ apiKey: process.env.ELEVENLABS_API_KEY, }); +const chunkText = (text: string, maxLength: number = 200): string[] => { + const sentences = text.match(/[^.!?]+[.!?]+/g) || [text]; + const chunks: string[] = []; + let currentChunk = ""; + + for (const sentence of sentences) { + if ((currentChunk + sentence).length > maxLength && currentChunk) { + chunks.push(currentChunk.trim()); + currentChunk = ""; + } + currentChunk += sentence; + } + + if (currentChunk) { + chunks.push(currentChunk.trim()); + } + + return chunks; +}; + export default async function handler( req: NextApiRequest, res: NextApiResponse ) { if (req.method !== "POST") { - return res.status(405).json({ error: "Method not allowed" }); + res.status(405).json({ error: "Method not allowed" }); + return; } - try { - const { text } = req.body; - - if (!text) { - return res.status(400).json({ error: "Text is required" }); - } + const { text } = req.body; - // Generate audio stream using the ElevenLabs client - const audioStream = await client.generate({ - voice: "Rachel", - model_id: "eleven_turbo_v2_5", - text, - }); + if (!text) { + res.status(400).json({ error: "Text is required" }); + return; + } - // Set headers for streaming + try { + // Set headers before starting stream res.setHeader("Content-Type", "audio/mpeg"); res.setHeader("Transfer-Encoding", "chunked"); - // Pipe the audio stream directly to the response - audioStream.pipe(res); + // Split text into manageable chunks + const textChunks = chunkText(text); - // Handle any errors that occur during streaming - audioStream.on("error", (error) => { - console.error("Streaming error:", error); - // Only send error if headers haven't been sent - if (!res.headersSent) { - res.status(500).json({ error: "Streaming failed" }); + // Generate and stream audio for each text chunk + for (const textChunk of textChunks) { + const audioStream = await client.generate({ + voice: "Rachel", + model_id: "eleven_turbo_v2", + text: textChunk, + }); + + // Stream each audio chunk to the client + for await (const chunk of audioStream) { + res.write(chunk); } - }); + } + + res.end(); } catch (error) { console.error("Error:", error); - res.status(500).json({ error: "Failed to generate speech" }); + if (!res.headersSent) { + res.status(500).json({ error: "Failed to generate speech" }); + } else { + res.end(); + } } }