From fd0492ec48e4df346c8d48903b04b5a83ff807c6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 6 Apr 2026 22:44:15 -0700
Subject: [PATCH 1/3] Fix voice input garbled text after auto-restart on
 silence timeout

When the browser's SpeechRecognition auto-stops due to silence (in some
browsers), the onend handler restarts it. After restart, Chrome creates a
fresh results list starting from index 0, but lastFinalizedLengthRef.current
still held the character offset from the previous session.

This caused the onresult handler to either:
- Skip new words entirely (if new finals.length < old offset)
- Report garbled mid-word substrings (if new finals.length exceeded the
  old offset at some point)

Fix: reset lastFinalizedLengthRef.current = 0 before calling start() in
the auto-restart path, so the new session's results are processed correctly
from the beginning.

Also adds a test file (currently skipped due to pre-existing React.act
infrastructure issue #320) with tests covering the key behaviors including
the auto-restart offset reset.

Fixes #317

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hooks/useVoiceRecording.test.ts | 136 ++++++++++++++++++++++++++++
 src/hooks/useVoiceRecording.ts      |   3 +
 vitest.component.config.ts          |   2 +-
 vitest.config.ts                    |   6 +-
 4 files changed, 145 insertions(+), 2 deletions(-)
 create mode 100644 src/hooks/useVoiceRecording.test.ts
diff --git a/src/hooks/useVoiceRecording.test.ts b/src/hooks/useVoiceRecording.test.ts
new file mode 100644
index 00000000..a12c62a1
--- /dev/null
+++ b/src/hooks/useVoiceRecording.test.ts
@@ -0,0 +1,136 @@
+import { renderHook, act } from '@testing-library/react';
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { useVoiceRecording } from './useVoiceRecording';
+
+interface MockRecognitionEvent {
+  resultIndex: number;
+  results: Array<{ isFinal: boolean; 0: { transcript: string } }>;
+}
+
+interface MockRecognitionErrorEvent {
+  error: string;
+  message: string;
+}
+
+interface MockSpeechRecognition {
+  continuous: boolean;
+  interimResults: boolean;
+  lang: string;
+  start: ReturnType<typeof vi.fn>;
+  stop: ReturnType<typeof vi.fn>;
+  abort: ReturnType<typeof vi.fn>;
+  onresult: ((event: MockRecognitionEvent) => void) | null;
+  onerror: ((event: MockRecognitionErrorEvent) => void) | null;
+  onend: (() => void) | null;
+}
+
+function makeMockRecognition(): MockSpeechRecognition {
+  return {
+    continuous: false,
+    interimResults: false,
+    lang: '',
+    start: vi.fn(),
+    stop: vi.fn(),
+    abort: vi.fn(),
+    onresult: null,
+    onerror: null,
+    onend: null,
+  };
+}
+
+// Tests are skipped due to a pre-existing React.act infrastructure issue in the jsdom
+// test environment (React 19 + @testing-library/react). See GitHub issue #320.
+describe.skip('useVoiceRecording', () => {
+  let mockInstance: MockSpeechRecognition;
+
+  beforeEach(() => {
+    mockInstance = makeMockRecognition();
+    const MockConstructor = vi.fn(() => mockInstance);
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (window as any).SpeechRecognition = MockConstructor;
+  });
+
+  function fireResult(results: Array<{ isFinal: boolean; transcript: string }>, resultIndex = 0) {
+    mockInstance.onresult?.({
+      resultIndex,
+      results: results.map((r) => ({ isFinal: r.isFinal, 0: { transcript: r.transcript } })),
+    });
+  }
+
+  it('reports finalized text via onFinalizedText callback', () => {
+    const onFinalized = vi.fn();
+    const { result } = renderHook(() => useVoiceRecording(onFinalized));
+
+    act(() => result.current.startRecording());
+    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }]));
+
+    expect(onFinalized).toHaveBeenCalledWith('hello ');
+  });
+
+  it('does not report interim text as finalized', () => {
+    const onFinalized = vi.fn();
+    const { result } = renderHook(() => useVoiceRecording(onFinalized));
+
+    act(() => result.current.startRecording());
+    act(() => fireResult([{ isFinal: false, transcript: 'hello' }]));
+
+    expect(onFinalized).not.toHaveBeenCalled();
+    expect(result.current.interimTranscript).toBe('hello');
+  });
+
+  it('only reports delta (not full text) when new finals arrive', () => {
+    const onFinalized = vi.fn();
+    const { result } = renderHook(() => useVoiceRecording(onFinalized));
+
+    act(() => result.current.startRecording());
+    // First word finalized
+    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }]));
+    // Second word finalized — cumulative results list
+    act(() =>
+      fireResult([
+        { isFinal: true, transcript: 'hello ' },
+        { isFinal: true, transcript: 'world ' },
+      ])
+    );
+
+    expect(onFinalized).toHaveBeenCalledTimes(2);
+    expect(onFinalized).toHaveBeenNthCalledWith(1, 'hello ');
+    expect(onFinalized).toHaveBeenNthCalledWith(2, 'world ');
+  });
+
+  it('resets the finalized offset when auto-restarting after silence timeout', () => {
+    const onFinalized = vi.fn();
+    const { result } = renderHook(() => useVoiceRecording(onFinalized));
+
+    act(() => result.current.startRecording());
+
+    // First session: "hello " is finalized
+    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }]));
+    expect(onFinalized).toHaveBeenCalledWith('hello ');
+    onFinalized.mockClear();
+
+    // Browser fires onend (silence timeout), hook auto-restarts
+    act(() => mockInstance.onend?.());
+    expect(mockInstance.start).toHaveBeenCalledTimes(2); // initial + restart
+
+    // Second session starts fresh — results list resets to index 0
+    act(() => fireResult([{ isFinal: true, transcript: 'world ' }]));
+
+    // "world " should be reported, not skipped or garbled
+    expect(onFinalized).toHaveBeenCalledWith('world ');
+  });
+
+  it('returns remaining interim text when stopRecording is called', () => {
+    const { result } = renderHook(() => useVoiceRecording());
+
+    act(() => result.current.startRecording());
+    act(() => fireResult([{ isFinal: false, transcript: 'hey there' }]));
+
+    let remaining: string | undefined;
+    act(() => {
+      remaining = result.current.stopRecording();
+    });
+
+    expect(remaining).toBe('hey there');
+  });
+});
diff --git a/src/hooks/useVoiceRecording.ts b/src/hooks/useVoiceRecording.ts
index ec2e7bfd..429ecd9a 100644
--- a/src/hooks/useVoiceRecording.ts
+++ b/src/hooks/useVoiceRecording.ts
@@ -126,6 +126,9 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) {
       // Auto-restart to maintain continuous recording.
       if (recognitionRef.current === recognition) {
         try {
+          // Reset the finalized length counter — the new session starts a fresh
+          // results list from index 0, so the old offset would cause garbled output.
+          lastFinalizedLengthRef.current = 0;
           recognition.start();
         } catch {
           // Can't restart — mark as stopped
diff --git a/vitest.component.config.ts b/vitest.component.config.ts
index 4fdcf5e3..a5a4e36d 100644
--- a/vitest.component.config.ts
+++ b/vitest.component.config.ts
@@ -5,7 +5,7 @@ export default defineConfig({
   test: {
     globals: true,
     environment: 'jsdom',
-    include: ['src/components/**/*.test.tsx', 'src/lib/**/*.test.tsx'],
+    include: ['src/components/**/*.test.tsx', 'src/lib/**/*.test.tsx', 'src/hooks/**/*.test.ts'],
     setupFiles: ['./src/test/setup-component.ts'],
     coverage: {
       provider: 'v8',
diff --git a/vitest.config.ts b/vitest.config.ts
index 58c22f3b..54a63b16 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -6,7 +6,11 @@ export default defineConfig({
     globals: true,
     environment: 'node',
     include: ['src/**/*.test.ts'],
-    exclude: ['src/**/*.integration.test.ts', 'src/components/**/*.test.tsx'],
+    exclude: [
+      'src/**/*.integration.test.ts',
+      'src/components/**/*.test.tsx',
+      'src/hooks/**/*.test.ts',
+    ],
     setupFiles: ['src/test/setup-unit.ts'],
     coverage: {
       provider: 'v8',

From 27c150ed751c9b3ccd85ae503f582c5842c8bb0c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 6 Apr 2026 22:49:57 -0700
Subject: [PATCH 2/3] Clear stale interim transcript on voice recognition
 auto-restart

On silence timeout auto-restart, also clear interimRef and interimTranscript
state so stale partial words don't persist into the new session.

Suggested by Opus code review of #321.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hooks/useVoiceRecording.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/hooks/useVoiceRecording.ts b/src/hooks/useVoiceRecording.ts
index 429ecd9a..b0fbe693 100644
--- a/src/hooks/useVoiceRecording.ts
+++ b/src/hooks/useVoiceRecording.ts
@@ -126,9 +126,11 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) {
       // Auto-restart to maintain continuous recording.
       if (recognitionRef.current === recognition) {
         try {
-          // Reset the finalized length counter — the new session starts a fresh
-          // results list from index 0, so the old offset would cause garbled output.
+          // Reset state — the new session starts a fresh results list from index 0,
+          // so the old offset would cause garbled output. Also clear any stale interim text.
           lastFinalizedLengthRef.current = 0;
+          interimRef.current = '';
+          setInterimTranscript('');
           recognition.start();
         } catch {
           // Can't restart — mark as stopped

From 69677635a6531cb2b2749142dd5661b5480f595a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 6 Apr 2026 22:55:22 -0700
Subject: [PATCH 3/3] Simplify voice recording by using event.resultIndex for
 new results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace character-offset delta tracking (lastFinalizedLengthRef) with the
Web Speech API's built-in resultIndex, which points directly to the first
new result in each event. This:

- Removes lastFinalizedLengthRef entirely — no drift risk if a browser
  ever revises a finalized result's text
- Each final result's transcript is emitted individually and directly,
  without string concatenation or substring extraction
- The onend auto-restart no longer needs to reset any offset; it only
  needs to clear the stale interim text (already done)

Also expands the skipped test suite with the missing cases identified in
the Opus code review: interim cleared on restart, no auto-restart after
intentional stop, error handling (not-allowed / no-speech / aborted),
start() throws in onend, and unmount cleanup.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/hooks/useVoiceRecording.test.ts | 107 ++++++++++++++++++++++++----
 src/hooks/useVoiceRecording.ts      |  29 ++++----
 2 files changed, 104 insertions(+), 32 deletions(-)

diff --git a/src/hooks/useVoiceRecording.test.ts b/src/hooks/useVoiceRecording.test.ts
index a12c62a1..82cd5d4b 100644
--- a/src/hooks/useVoiceRecording.test.ts
+++ b/src/hooks/useVoiceRecording.test.ts
@@ -50,6 +50,11 @@ describe.skip('useVoiceRecording', () => {
     (window as any).SpeechRecognition = MockConstructor;
   });
 
+  /**
+   * Simulate a SpeechRecognition result event. `resultIndex` matches the Web
+   * Speech API spec: it is the index of the first NEW result in the list.
+   * Previous results are unchanged from the last event.
+   */
   function fireResult(results: Array<{ isFinal: boolean; transcript: string }>, resultIndex = 0) {
     mockInstance.onresult?.({
       resultIndex,
@@ -62,7 +67,7 @@ describe.skip('useVoiceRecording', () => {
     const { result } = renderHook(() => useVoiceRecording(onFinalized));
 
     act(() => result.current.startRecording());
-    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }]));
+    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }], 0));
 
     expect(onFinalized).toHaveBeenCalledWith('hello ');
   });
@@ -72,25 +77,28 @@ describe.skip('useVoiceRecording', () => {
     const { result } = renderHook(() => useVoiceRecording(onFinalized));
 
     act(() => result.current.startRecording());
-    act(() => fireResult([{ isFinal: false, transcript: 'hello' }]));
+    act(() => fireResult([{ isFinal: false, transcript: 'hello' }], 0));
 
     expect(onFinalized).not.toHaveBeenCalled();
     expect(result.current.interimTranscript).toBe('hello');
   });
 
-  it('only reports delta (not full text) when new finals arrive', () => {
+  it('only reports each result once using resultIndex', () => {
     const onFinalized = vi.fn();
     const { result } = renderHook(() => useVoiceRecording(onFinalized));
 
     act(() => result.current.startRecording());
-    // First word finalized
-    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }]));
-    // Second word finalized — cumulative results list
+    // First word finalized — resultIndex=0, result[0] is new
+    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }], 0));
+    // Second word finalized — resultIndex=1, only result[1] is new; result[0] already handled
     act(() =>
-      fireResult([
-        { isFinal: true, transcript: 'hello ' },
-        { isFinal: true, transcript: 'world ' },
-      ])
+      fireResult(
+        [
+          { isFinal: true, transcript: 'hello ' },
+          { isFinal: true, transcript: 'world ' },
+        ],
+        1
+      )
     );
 
     expect(onFinalized).toHaveBeenCalledTimes(2);
@@ -98,14 +106,14 @@ describe.skip('useVoiceRecording', () => {
     expect(onFinalized).toHaveBeenNthCalledWith(2, 'world ');
   });
 
-  it('resets the finalized offset when auto-restarting after silence timeout', () => {
+  it('correctly processes new results after silence timeout auto-restart', () => {
     const onFinalized = vi.fn();
     const { result } = renderHook(() => useVoiceRecording(onFinalized));
 
     act(() => result.current.startRecording());
 
     // First session: "hello " is finalized
-    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }]));
+    act(() => fireResult([{ isFinal: true, transcript: 'hello ' }], 0));
     expect(onFinalized).toHaveBeenCalledWith('hello ');
     onFinalized.mockClear();
 
@@ -114,17 +122,32 @@ describe.skip('useVoiceRecording', () => {
     expect(mockInstance.start).toHaveBeenCalledTimes(2); // initial + restart
 
     // Second session starts fresh — results list resets to index 0
-    act(() => fireResult([{ isFinal: true, transcript: 'world ' }]));
+    act(() => fireResult([{ isFinal: true, transcript: 'world ' }], 0));
 
-    // "world " should be reported, not skipped or garbled
+    // "world " should be reported correctly, not skipped or garbled
     expect(onFinalized).toHaveBeenCalledWith('world ');
   });
 
+  it('clears interim transcript on auto-restart', () => {
+    const { result } = renderHook(() => useVoiceRecording());
+
+    act(() => result.current.startRecording());
+    // Interim result before silence timeout
+    act(() => fireResult([{ isFinal: false, transcript: 'partial' }], 0));
+    expect(result.current.interimTranscript).toBe('partial');
+
+    // Auto-restart on silence timeout
+    act(() => mockInstance.onend?.());
+
+    // Stale interim should be cleared
+    expect(result.current.interimTranscript).toBe('');
+  });
+
   it('returns remaining interim text when stopRecording is called', () => {
     const { result } = renderHook(() => useVoiceRecording());
 
     act(() => result.current.startRecording());
-    act(() => fireResult([{ isFinal: false, transcript: 'hey there' }]));
+    act(() => fireResult([{ isFinal: false, transcript: 'hey there' }], 0));
 
     let remaining: string | undefined;
     act(() => {
@@ -133,4 +156,58 @@ describe.skip('useVoiceRecording', () => {
 
     expect(remaining).toBe('hey there');
   });
+
+  it('does not auto-restart after intentional stopRecording', () => {
+    const { result } = renderHook(() => useVoiceRecording());
+
+    act(() => result.current.startRecording());
+    act(() => result.current.stopRecording());
+
+    // onend fires after stop() — should NOT restart
+    act(() => mockInstance.onend?.());
+
+    expect(mockInstance.start).toHaveBeenCalledTimes(1); // only the initial start
+  });
+
+  it('reports a user-friendly error for microphone permission denial', () => {
+    const { result } = renderHook(() => useVoiceRecording());
+
+    act(() => result.current.startRecording());
+    act(() => mockInstance.onerror?.({ error: 'not-allowed', message: '' }));
+
+    expect(result.current.error).toBe(
+      'Microphone permission denied. Please allow microphone access.'
+    );
+  });
+
+  it('ignores no-speech and aborted errors', () => {
+    const { result } = renderHook(() => useVoiceRecording());
+
+    act(() => result.current.startRecording());
+    act(() => mockInstance.onerror?.({ error: 'no-speech', message: '' }));
+    act(() => mockInstance.onerror?.({ error: 'aborted', message: '' }));
+
+    expect(result.current.error).toBeNull();
+  });
+
+  it('marks stopped and does not restart when start() throws in onend', () => {
+    const { result } = renderHook(() => useVoiceRecording());
+
+    act(() => result.current.startRecording());
+    mockInstance.start.mockImplementationOnce(() => {
+      throw new Error('cannot restart');
+    });
+    act(() => mockInstance.onend?.());
+
+    expect(result.current.isRecording).toBe(false);
+  });
+
+  it('stops recognition on unmount', () => {
+    const { result, unmount } = renderHook(() => useVoiceRecording());
+
+    act(() => result.current.startRecording());
+    unmount();
+
+    expect(mockInstance.stop).toHaveBeenCalled();
+  });
 });
diff --git a/src/hooks/useVoiceRecording.ts b/src/hooks/useVoiceRecording.ts
index b0fbe693..a58cdb7e 100644
--- a/src/hooks/useVoiceRecording.ts
+++ b/src/hooks/useVoiceRecording.ts
@@ -48,7 +48,6 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) {
   const [interimTranscript, setInterimTranscript] = useState('');
   const [error, setError] = useState<string | null>(null);
   const recognitionRef = useRef<SpeechRecognitionInstance | null>(null);
-  const lastFinalizedLengthRef = useRef(0);
   const interimRef = useRef('');
   const onFinalizedTextRef = useRef(onFinalizedText);
 
@@ -71,7 +70,6 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) {
   const startRecording = useCallback(() => {
     setError(null);
     setInterimTranscript('');
-    lastFinalizedLengthRef.current = 0;
     interimRef.current = '';
 
     const SpeechRecognition = getSpeechRecognition();
@@ -87,23 +85,22 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) {
     recognitionRef.current = recognition;
 
     recognition.onresult = (event: SpeechRecognitionEvent) => {
-      let finals = '';
-      let interim = '';
-      for (let i = 0; i < event.results.length; i++) {
+      // Only process results starting from resultIndex — previous results are already
+      // finalized and were handled by earlier events.
+      for (let i = event.resultIndex; i < event.results.length; i++) {
         if (event.results[i].isFinal) {
-          finals += event.results[i][0].transcript;
-        } else {
-          interim += event.results[i][0].transcript;
+          onFinalizedTextRef.current?.(event.results[i][0].transcript);
         }
       }
 
-      // Call back with new finalized text (the delta since last callback)
-      if (finals.length > lastFinalizedLengthRef.current) {
-        const newText = finals.substring(lastFinalizedLengthRef.current);
-        lastFinalizedLengthRef.current = finals.length;
-        onFinalizedTextRef.current?.(newText);
+      // Rebuild interim from all non-final results in the current session.
+      // In practice Chrome keeps at most one interim result at the end of the list.
+      let interim = '';
+      for (let i = 0; i < event.results.length; i++) {
+        if (!event.results[i].isFinal) {
+          interim += event.results[i][0].transcript;
+        }
       }
-
       interimRef.current = interim;
       setInterimTranscript(interim);
     };
@@ -126,9 +123,7 @@ export function useVoiceRecording(onFinalizedText?: (text: string) => void) {
       // Auto-restart to maintain continuous recording.
       if (recognitionRef.current === recognition) {
         try {
-          // Reset state — the new session starts a fresh results list from index 0,
-          // so the old offset would cause garbled output. Also clear any stale interim text.
-          lastFinalizedLengthRef.current = 0;
+          // Clear stale interim text from the ended session before starting fresh.
           interimRef.current = '';
           setInterimTranscript('');
           recognition.start();