From fd0492ec48e4df346c8d48903b04b5a83ff807c6 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 6 Apr 2026 22:44:15 -0700 Subject: [PATCH 1/3] Fix voice input garbled text after auto-restart on silence timeout When the browser's SpeechRecognition auto-stops due to silence (in some browsers), the onend handler restarts it. After restart, Chrome creates a fresh results list starting from index 0, but lastFinalizedLengthRef.current still held the character offset from the previous session. This caused the onresult handler to either: - Skip new words entirely (if new finals.length < old offset) - Report garbled mid-word substrings (if new finals.length exceeded the old offset at some point) Fix: reset lastFinalizedLengthRef.current = 0 before calling start() in the auto-restart path, so the new session's results are processed correctly from the beginning. Also adds a test file (currently skipped due to pre-existing React.act infrastructure issue #320) with tests covering the key behaviors including the auto-restart offset reset. Fixes #317 Co-Authored-By: Claude Sonnet 4.6 --- src/hooks/useVoiceRecording.test.ts | 136 ++++++++++++++++++++++++++++ src/hooks/useVoiceRecording.ts | 3 + vitest.component.config.ts | 2 +- vitest.config.ts | 6 +- 4 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 src/hooks/useVoiceRecording.test.ts diff --git a/src/hooks/useVoiceRecording.test.ts b/src/hooks/useVoiceRecording.test.ts new file mode 100644 index 00000000..a12c62a1 --- /dev/null +++ b/src/hooks/useVoiceRecording.test.ts @@ -0,0 +1,136 @@ +import { renderHook, act } from '@testing-library/react'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { useVoiceRecording } from './useVoiceRecording'; + +interface MockRecognitionEvent { + resultIndex: number; + results: Array<{ isFinal: boolean; 0: { transcript: string } }>; +} + +interface MockRecognitionErrorEvent { + error: string; + message: string; +} + +interface MockSpeechRecognition { + continuous: boolean; + interimResults: boolean; + lang: string; + start: ReturnType; + stop: ReturnType; + abort: ReturnType; + onresult: ((event: MockRecognitionEvent) => void) | null; + onerror: ((event: MockRecognitionErrorEvent) => void) | null; + onend: (() => void) | null; +} + +function makeMockRecognition(): MockSpeechRecognition { + return { + continuous: false, + interimResults: false, + lang: '', + start: vi.fn(), + stop: vi.fn(), + abort: vi.fn(), + onresult: null, + onerror: null, + onend: null, + }; +} + +// Tests are skipped due to a pre-existing React.act infrastructure issue in the jsdom +// test environment (React 19 + @testing-library/react). See GitHub issue #320. +describe.skip('useVoiceRecording', () => { + let mockInstance: MockSpeechRecognition; + + beforeEach(() => { + mockInstance = makeMockRecognition(); + const MockConstructor = vi.fn(() => mockInstance); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (window as any).SpeechRecognition = MockConstructor; + }); + + function fireResult(results: Array<{ isFinal: boolean; transcript: string }>, resultIndex = 0) { + mockInstance.onresult?.({ + resultIndex, + results: results.map((r) => ({ isFinal: r.isFinal, 0: { transcript: r.transcript } })), + }); + } + + it('reports finalized text via onFinalizedText callback', () => { + const onFinalized = vi.fn(); + const { result } = renderHook(() => useVoiceRecording(onFinalized)); + + act(() => result.current.startRecording()); + act(() => fireResult([{ isFinal: true, transcript: 'hello ' }])); + + expect(onFinalized).toHaveBeenCalledWith('hello '); + }); + + it('does not report interim text as finalized', () => { + const onFinalized = vi.fn(); + const { result } = renderHook(() => useVoiceRecording(onFinalized)); + + act(() => result.current.startRecording()); + act(() => fireResult([{ isFinal: false, transcript: 'hello' }])); + + expect(onFinalized).not.toHaveBeenCalled(); + expect(result.current.interimTranscript).toBe('hello'); + }); + + it('only reports delta (not full text) when new finals arrive', () => { + const onFinalized = vi.fn(); + const { result } = renderHook(() => useVoiceRecording(onFinalized)); + + act(() => result.current.startRecording()); + // First word finalized + act(() => fireResult([{ isFinal: true, transcript: 'hello ' }])); + // Second word finalized — cumulative results list + act(() => + fireResult([ + { isFinal: true, transcript: 'hello ' }, + { isFinal: true, transcript: 'world ' }, + ]) + ); + + expect(onFinalized).toHaveBeenCalledTimes(2); + expect(onFinalized).toHaveBeenNthCalledWith(1, 'hello '); + expect(onFinalized).toHaveBeenNthCalledWith(2, 'world '); + }); + + it('resets the finalized offset when auto-restarting after silence timeout', () => { + const onFinalized = vi.fn(); + const { result } = renderHook(() => useVoiceRecording(onFinalized)); + + act(() => result.current.startRecording()); + + // First session: "hello " is finalized + act(() => fireResult([{ isFinal: true, transcript: 'hello ' }])); + expect(onFinalized).toHaveBeenCalledWith('hello '); + onFinalized.mockClear(); + + // Browser fires onend (silence timeout), hook auto-restarts + act(() => mockInstance.onend?.()); + expect(mockInstance.start).toHaveBeenCalledTimes(2); // initial + restart + + // Second session starts fresh — results list resets to index 0 + act(() => fireResult([{ isFinal: true, transcript: 'world ' }])); + + // "world " should be reported, not skipped or garbled + expect(onFinalized).toHaveBeenCalledWith('world '); + }); + + it('returns remaining interim text when stopRecording is called', () => { + const { result } = renderHook(() => useVoiceRecording()); + + act(() => result.current.startRecording()); + act(() => fireResult([{ isFinal: false, transcript: 'hey there' }])); + + let remaining: string | undefined; + act(() => { + remaining = result.current.stopRecording(); + }); + + expect(remaining).toBe('hey there'); + }); +}); diff --git a/src/hooks/useVoiceRecording.ts b/src/hooks/useVoiceRecording.ts index ec2e7bfd..429ecd9a 100644 --- a/src/hooks/useVoiceRecording.ts +++ b/src/hooks/useVoiceRecording.ts @@ -126,6 +126,9 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) { // Auto-restart to maintain continuous recording. if (recognitionRef.current === recognition) { try { + // Reset the finalized length counter — the new session starts a fresh + // results list from index 0, so the old offset would cause garbled output. + lastFinalizedLengthRef.current = 0; recognition.start(); } catch { // Can't restart — mark as stopped diff --git a/vitest.component.config.ts b/vitest.component.config.ts index 4fdcf5e3..a5a4e36d 100644 --- a/vitest.component.config.ts +++ b/vitest.component.config.ts @@ -5,7 +5,7 @@ export default defineConfig({ test: { globals: true, environment: 'jsdom', - include: ['src/components/**/*.test.tsx', 'src/lib/**/*.test.tsx'], + include: ['src/components/**/*.test.tsx', 'src/lib/**/*.test.tsx', 'src/hooks/**/*.test.ts'], setupFiles: ['./src/test/setup-component.ts'], coverage: { provider: 'v8', diff --git a/vitest.config.ts b/vitest.config.ts index 58c22f3b..54a63b16 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -6,7 +6,11 @@ export default defineConfig({ globals: true, environment: 'node', include: ['src/**/*.test.ts'], - exclude: ['src/**/*.integration.test.ts', 'src/components/**/*.test.tsx'], + exclude: [ + 'src/**/*.integration.test.ts', + 'src/components/**/*.test.tsx', + 'src/hooks/**/*.test.ts', + ], setupFiles: ['src/test/setup-unit.ts'], coverage: { provider: 'v8', From 27c150ed751c9b3ccd85ae503f582c5842c8bb0c Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 6 Apr 2026 22:49:57 -0700 Subject: [PATCH 2/3] Clear stale interim transcript on voice recognition auto-restart On silence timeout auto-restart, also clear interimRef and interimTranscript state so stale partial words don't persist into the new session. Suggested by Opus code review of #321. Co-Authored-By: Claude Sonnet 4.6 --- src/hooks/useVoiceRecording.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hooks/useVoiceRecording.ts b/src/hooks/useVoiceRecording.ts index 429ecd9a..b0fbe693 100644 --- a/src/hooks/useVoiceRecording.ts +++ b/src/hooks/useVoiceRecording.ts @@ -126,9 +126,11 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) { // Auto-restart to maintain continuous recording. if (recognitionRef.current === recognition) { try { - // Reset the finalized length counter — the new session starts a fresh - // results list from index 0, so the old offset would cause garbled output. + // Reset state — the new session starts a fresh results list from index 0, + // so the old offset would cause garbled output. Also clear any stale interim text. lastFinalizedLengthRef.current = 0; + interimRef.current = ''; + setInterimTranscript(''); recognition.start(); } catch { // Can't restart — mark as stopped From 69677635a6531cb2b2749142dd5661b5480f595a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 6 Apr 2026 22:55:22 -0700 Subject: [PATCH 3/3] Simplify voice recording by using event.resultIndex for new results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace character-offset delta tracking (lastFinalizedLengthRef) with the Web Speech API's built-in resultIndex, which points directly to the first new result in each event. This: - Removes lastFinalizedLengthRef entirely — no drift risk if a browser ever revises a finalized result's text - Each final result's transcript is emitted individually and directly, without string concatenation or substring extraction - The onend auto-restart no longer needs to reset any offset; it only needs to clear the stale interim text (already done) Also expands the skipped test suite with the missing cases identified in the Opus code review: interim cleared on restart, no auto-restart after intentional stop, error handling (not-allowed / no-speech / aborted), start() throws in onend, and unmount cleanup. Co-Authored-By: Claude Sonnet 4.6 --- src/hooks/useVoiceRecording.test.ts | 107 ++++++++++++++++++++++++---- src/hooks/useVoiceRecording.ts | 29 ++++---- 2 files changed, 104 insertions(+), 32 deletions(-) diff --git a/src/hooks/useVoiceRecording.test.ts b/src/hooks/useVoiceRecording.test.ts index a12c62a1..82cd5d4b 100644 --- a/src/hooks/useVoiceRecording.test.ts +++ b/src/hooks/useVoiceRecording.test.ts @@ -50,6 +50,11 @@ describe.skip('useVoiceRecording', () => { (window as any).SpeechRecognition = MockConstructor; }); + /** + * Simulate a SpeechRecognition result event. `resultIndex` matches the Web + * Speech API spec: it is the index of the first NEW result in the list. + * Previous results are unchanged from the last event. + */ function fireResult(results: Array<{ isFinal: boolean; transcript: string }>, resultIndex = 0) { mockInstance.onresult?.({ resultIndex, @@ -62,7 +67,7 @@ describe.skip('useVoiceRecording', () => { const { result } = renderHook(() => useVoiceRecording(onFinalized)); act(() => result.current.startRecording()); - act(() => fireResult([{ isFinal: true, transcript: 'hello ' }])); + act(() => fireResult([{ isFinal: true, transcript: 'hello ' }], 0)); expect(onFinalized).toHaveBeenCalledWith('hello '); }); @@ -72,25 +77,28 @@ describe.skip('useVoiceRecording', () => { const { result } = renderHook(() => useVoiceRecording(onFinalized)); act(() => result.current.startRecording()); - act(() => fireResult([{ isFinal: false, transcript: 'hello' }])); + act(() => fireResult([{ isFinal: false, transcript: 'hello' }], 0)); expect(onFinalized).not.toHaveBeenCalled(); expect(result.current.interimTranscript).toBe('hello'); }); - it('only reports delta (not full text) when new finals arrive', () => { + it('only reports each result once using resultIndex', () => { const onFinalized = vi.fn(); const { result } = renderHook(() => useVoiceRecording(onFinalized)); act(() => result.current.startRecording()); - // First word finalized - act(() => fireResult([{ isFinal: true, transcript: 'hello ' }])); - // Second word finalized — cumulative results list + // First word finalized — resultIndex=0, result[0] is new + act(() => fireResult([{ isFinal: true, transcript: 'hello ' }], 0)); + // Second word finalized — resultIndex=1, only result[1] is new; result[0] already handled act(() => - fireResult([ - { isFinal: true, transcript: 'hello ' }, - { isFinal: true, transcript: 'world ' }, - ]) + fireResult( + [ + { isFinal: true, transcript: 'hello ' }, + { isFinal: true, transcript: 'world ' }, + ], + 1 + ) ); expect(onFinalized).toHaveBeenCalledTimes(2); @@ -98,14 +106,14 @@ describe.skip('useVoiceRecording', () => { expect(onFinalized).toHaveBeenNthCalledWith(2, 'world '); }); - it('resets the finalized offset when auto-restarting after silence timeout', () => { + it('correctly processes new results after silence timeout auto-restart', () => { const onFinalized = vi.fn(); const { result } = renderHook(() => useVoiceRecording(onFinalized)); act(() => result.current.startRecording()); // First session: "hello " is finalized - act(() => fireResult([{ isFinal: true, transcript: 'hello ' }])); + act(() => fireResult([{ isFinal: true, transcript: 'hello ' }], 0)); expect(onFinalized).toHaveBeenCalledWith('hello '); onFinalized.mockClear(); @@ -114,17 +122,32 @@ describe.skip('useVoiceRecording', () => { expect(mockInstance.start).toHaveBeenCalledTimes(2); // initial + restart // Second session starts fresh — results list resets to index 0 - act(() => fireResult([{ isFinal: true, transcript: 'world ' }])); + act(() => fireResult([{ isFinal: true, transcript: 'world ' }], 0)); - // "world " should be reported, not skipped or garbled + // "world " should be reported correctly, not skipped or garbled expect(onFinalized).toHaveBeenCalledWith('world '); }); + it('clears interim transcript on auto-restart', () => { + const { result } = renderHook(() => useVoiceRecording()); + + act(() => result.current.startRecording()); + // Interim result before silence timeout + act(() => fireResult([{ isFinal: false, transcript: 'partial' }], 0)); + expect(result.current.interimTranscript).toBe('partial'); + + // Auto-restart on silence timeout + act(() => mockInstance.onend?.()); + + // Stale interim should be cleared + expect(result.current.interimTranscript).toBe(''); + }); + it('returns remaining interim text when stopRecording is called', () => { const { result } = renderHook(() => useVoiceRecording()); act(() => result.current.startRecording()); - act(() => fireResult([{ isFinal: false, transcript: 'hey there' }])); + act(() => fireResult([{ isFinal: false, transcript: 'hey there' }], 0)); let remaining: string | undefined; act(() => { @@ -133,4 +156,58 @@ describe.skip('useVoiceRecording', () => { expect(remaining).toBe('hey there'); }); + + it('does not auto-restart after intentional stopRecording', () => { + const { result } = renderHook(() => useVoiceRecording()); + + act(() => result.current.startRecording()); + act(() => result.current.stopRecording()); + + // onend fires after stop() — should NOT restart + act(() => mockInstance.onend?.()); + + expect(mockInstance.start).toHaveBeenCalledTimes(1); // only the initial start + }); + + it('reports a user-friendly error for microphone permission denial', () => { + const { result } = renderHook(() => useVoiceRecording()); + + act(() => result.current.startRecording()); + act(() => mockInstance.onerror?.({ error: 'not-allowed', message: '' })); + + expect(result.current.error).toBe( + 'Microphone permission denied. Please allow microphone access.' + ); + }); + + it('ignores no-speech and aborted errors', () => { + const { result } = renderHook(() => useVoiceRecording()); + + act(() => result.current.startRecording()); + act(() => mockInstance.onerror?.({ error: 'no-speech', message: '' })); + act(() => mockInstance.onerror?.({ error: 'aborted', message: '' })); + + expect(result.current.error).toBeNull(); + }); + + it('marks stopped and does not restart when start() throws in onend', () => { + const { result } = renderHook(() => useVoiceRecording()); + + act(() => result.current.startRecording()); + mockInstance.start.mockImplementationOnce(() => { + throw new Error('cannot restart'); + }); + act(() => mockInstance.onend?.()); + + expect(result.current.isRecording).toBe(false); + }); + + it('stops recognition on unmount', () => { + const { result, unmount } = renderHook(() => useVoiceRecording()); + + act(() => result.current.startRecording()); + unmount(); + + expect(mockInstance.stop).toHaveBeenCalled(); + }); }); diff --git a/src/hooks/useVoiceRecording.ts b/src/hooks/useVoiceRecording.ts index b0fbe693..a58cdb7e 100644 --- a/src/hooks/useVoiceRecording.ts +++ b/src/hooks/useVoiceRecording.ts @@ -48,7 +48,6 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) { const [interimTranscript, setInterimTranscript] = useState(''); const [error, setError] = useState(null); const recognitionRef = useRef(null); - const lastFinalizedLengthRef = useRef(0); const interimRef = useRef(''); const onFinalizedTextRef = useRef(onFinalizedText); @@ -71,7 +70,6 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) { const startRecording = useCallback(() => { setError(null); setInterimTranscript(''); - lastFinalizedLengthRef.current = 0; interimRef.current = ''; const SpeechRecognition = getSpeechRecognition(); @@ -87,23 +85,22 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) { recognitionRef.current = recognition; recognition.onresult = (event: SpeechRecognitionEvent) => { - let finals = ''; - let interim = ''; - for (let i = 0; i < event.results.length; i++) { + // Only process results starting from resultIndex — previous results are already + // finalized and were handled by earlier events. + for (let i = event.resultIndex; i < event.results.length; i++) { if (event.results[i].isFinal) { - finals += event.results[i][0].transcript; - } else { - interim += event.results[i][0].transcript; + onFinalizedTextRef.current?.(event.results[i][0].transcript); } } - // Call back with new finalized text (the delta since last callback) - if (finals.length > lastFinalizedLengthRef.current) { - const newText = finals.substring(lastFinalizedLengthRef.current); - lastFinalizedLengthRef.current = finals.length; - onFinalizedTextRef.current?.(newText); + // Rebuild interim from all non-final results in the current session. + // In practice Chrome keeps at most one interim result at the end of the list. + let interim = ''; + for (let i = 0; i < event.results.length; i++) { + if (!event.results[i].isFinal) { + interim += event.results[i][0].transcript; + } } - interimRef.current = interim; setInterimTranscript(interim); }; @@ -126,9 +123,7 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) { // Auto-restart to maintain continuous recording. if (recognitionRef.current === recognition) { try { - // Reset state — the new session starts a fresh results list from index 0, - // so the old offset would cause garbled output. Also clear any stale interim text. - lastFinalizedLengthRef.current = 0; + // Clear stale interim text from the ended session before starting fresh. interimRef.current = ''; setInterimTranscript(''); recognition.start();