Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions echo-frontend/src/components/BookDropZone.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { cn, cleanTextWithGemini } from "@/lib/utils";
import { FileUp } from "lucide-react";
import { Subheader2, Paragraph } from "@/components/Typography";
import ePub from "epubjs";
import pdfToText from "react-pdftotext";

interface BookDropZoneProps {
onTextExtracted: (text: string) => void;
Expand Down Expand Up @@ -35,12 +34,15 @@ export function BookDropZone({ onTextExtracted }: BookDropZoneProps) {
const extractTextFromPdf = async (file: File) => {
try {
setIsProcessing(true);

// Dynamic import of pdfToText
const { default: pdfToText } = await import("react-pdftotext");
const text = await pdfToText(file);
// console.log("Text:", text.slice(0, 1000));

const cleanedText = await cleanTextWithGemini(text);
console.log("Cleaned text:", cleanedText);
onTextExtracted(cleanedText);
console.log("Extracted and cleaned text from PDF");
setSelectedFile(file.name);
} catch (error) {
console.error("Failed to extract text from PDF", error);
} finally {
Expand Down
58 changes: 41 additions & 17 deletions echo-frontend/src/hooks/useAudioPlayer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@ export function useAudioPlayer(text: string) {
const [currentTimeSeconds, setCurrentTimeSeconds] = useState(0);
const [duration, setDuration] = useState(0);
const [bufferingProgress, setBufferingProgress] = useState(0);
const [estimatedTotalDuration, setEstimatedTotalDuration] = useState(0);
const audioRef = useRef<HTMLAudioElement | null>(null);
const mediaSourceRef = useRef<MediaSource | null>(null);
const sourceBufferRef = useRef<SourceBuffer | null>(null);
const chunksReceivedRef = useRef<number>(0);
const totalChunksRef = useRef<number>(0);
const receivedBytesRef = useRef(0);

// Calculate estimated duration based on text length and average speaking rate
useEffect(() => {
if (text) {
// Estimate ~3 words per second for speech
const wordCount = text.split(/\s+/).length;
const estimatedSeconds = wordCount / 3;
setEstimatedTotalDuration(estimatedSeconds);
}
}, [text]);

// Prepare audio when text changes
useEffect(() => {
Expand All @@ -19,7 +29,8 @@ export function useAudioPlayer(text: string) {

setIsLoading(true);
setBufferingProgress(0);
setDuration(0); // Reset duration when starting new audio
setDuration(0);
receivedBytesRef.current = 0;

try {
// Create MediaSource instance
Expand Down Expand Up @@ -81,7 +92,6 @@ export function useAudioPlayer(text: string) {
const { done, value } = await reader.read();
if (done) break;

// Wait if the buffer is updating
await new Promise<void>((resolve) => {
const checkBuffer = () => {
if (!sourceBuffer.updating) {
Expand All @@ -93,17 +103,18 @@ export function useAudioPlayer(text: string) {
checkBuffer();
});

console.log("value", value);

// Append chunk to source buffer
sourceBuffer.appendBuffer(value);
receivedLength += value.length;
receivedBytesRef.current += value.length;

// Update buffering progress based on received data
// Note: This is an estimate since we don't know the total size
// Update buffering progress based on estimated total size
// Assuming ~15KB per second of audio
const estimatedTotalBytes = estimatedTotalDuration * 15 * 1024;
setBufferingProgress(
Math.min((receivedLength / (1024 * 1024)) * 20, 100)
); // Assuming ~1MB total size
Math.min(
(receivedBytesRef.current / estimatedTotalBytes) * 100,
99
)
);
}

// Close media source when all chunks are received
Expand All @@ -125,7 +136,7 @@ export function useAudioPlayer(text: string) {
};

prepareAudio();
}, [text]);
}, [text, estimatedTotalDuration]);

// Cleanup on unmount
useEffect(() => {
Expand Down Expand Up @@ -179,26 +190,39 @@ export function useAudioPlayer(text: string) {
audioRef.current.pause();
}

const newTime = (progressPercent / 100) * audioRef.current.duration;
audioRef.current.currentTime = newTime;
setCurrentTimeSeconds(newTime);
// Use the actual duration if available, otherwise use estimated duration
const totalDuration = duration || estimatedTotalDuration;
const newTime = (progressPercent / 100) * totalDuration;

// Clamp the new time to the available duration
const clampedTime = Math.min(newTime, audioRef.current.duration || 0);
audioRef.current.currentTime = clampedTime;
setCurrentTimeSeconds(clampedTime);

if (wasPlaying) {
audioRef.current.play();
}
}
};

// Return the current time as a percentage of the total estimated/actual duration
const getCurrentProgress = () => {
const totalDuration = duration || estimatedTotalDuration;
if (!totalDuration) return 0;
return (currentTimeSeconds / totalDuration) * 100;
};

return {
isPlaying,
isLoading,
currentTimeSeconds,
duration,
duration: duration || estimatedTotalDuration, // Use estimated duration until actual is available
bufferingProgress,
handlePlayPause,
handleSkipForward,
handleSkipBack,
handleProgressChange,
setIsPlaying,
getCurrentProgress, // Add this new helper function
};
}
74 changes: 50 additions & 24 deletions echo-frontend/src/pages/api/tts-eleven.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,45 +9,71 @@ const client = new ElevenLabsClient({
apiKey: process.env.ELEVENLABS_API_KEY,
});

const chunkText = (text: string, maxLength: number = 200): string[] => {
const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
const chunks: string[] = [];
let currentChunk = "";

for (const sentence of sentences) {
if ((currentChunk + sentence).length > maxLength && currentChunk) {
chunks.push(currentChunk.trim());
currentChunk = "";
}
currentChunk += sentence;
}

if (currentChunk) {
chunks.push(currentChunk.trim());
}

return chunks;
};

export default async function handler(
req: NextApiRequest,
res: NextApiResponse
) {
if (req.method !== "POST") {
return res.status(405).json({ error: "Method not allowed" });
res.status(405).json({ error: "Method not allowed" });
return;
}

try {
const { text } = req.body;

if (!text) {
return res.status(400).json({ error: "Text is required" });
}
const { text } = req.body;

// Generate audio stream using the ElevenLabs client
const audioStream = await client.generate({
voice: "Rachel",
model_id: "eleven_turbo_v2_5",
text,
});
if (!text) {
res.status(400).json({ error: "Text is required" });
return;
}

// Set headers for streaming
try {
// Set headers before starting stream
res.setHeader("Content-Type", "audio/mpeg");
res.setHeader("Transfer-Encoding", "chunked");

// Pipe the audio stream directly to the response
audioStream.pipe(res);
// Split text into manageable chunks
const textChunks = chunkText(text);

// Handle any errors that occur during streaming
audioStream.on("error", (error) => {
console.error("Streaming error:", error);
// Only send error if headers haven't been sent
if (!res.headersSent) {
res.status(500).json({ error: "Streaming failed" });
// Generate and stream audio for each text chunk
for (const textChunk of textChunks) {
const audioStream = await client.generate({
voice: "Rachel",
model_id: "eleven_turbo_v2",
text: textChunk,
});

// Stream each audio chunk to the client
for await (const chunk of audioStream) {
res.write(chunk);
}
});
}

res.end();
} catch (error) {
console.error("Error:", error);
res.status(500).json({ error: "Failed to generate speech" });
if (!res.headersSent) {
res.status(500).json({ error: "Failed to generate speech" });
} else {
res.end();
}
}
}