Skip to content

Commit 47bd62f

Browse files
committed
fix(voice): resolve native crash and Whisper encoding failure during recording
- Win32AudioCaptureService: fix race condition between StopAsync resource cleanup and WaveInCallback on the multimedia thread. Move all pointer access inside lock and set _isRecording=false before waveInReset to prevent callbacks from touching freed GCHandles/buffers. - WhisperSpeechToTextEngine: reject audio shorter than 1s (16k samples) to avoid WhisperProcessingException from the native encoder. Made-with: Cursor
1 parent d45e9d6 commit 47bd62f

File tree

2 files changed

+22
-20
lines changed

2 files changed

+22
-20
lines changed

src/LiveLingo.Core/Speech/WhisperSpeechToTextEngine.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ namespace LiveLingo.Core.Speech;
66

77
public sealed class WhisperSpeechToTextEngine : ISpeechToTextEngine
88
{
9+
private const int MinSampleRate = 16000;
10+
private const int MinDurationSamples = MinSampleRate; // 1 second at 16kHz
11+
912
private readonly IModelManager _modelManager;
1013
private readonly ILogger<WhisperSpeechToTextEngine>? _logger;
1114
private WhisperFactory? _factory;
@@ -32,6 +35,14 @@ public async Task<SpeechTranscriptionResult> TranscribeAsync(
3235
var processor = await GetOrLoadProcessorAsync(language, ct);
3336
var samples = ConvertPcmToFloat(audio);
3437

38+
if (samples.Length < MinDurationSamples)
39+
{
40+
_logger?.LogDebug(
41+
"Audio too short for Whisper encoding ({Samples} samples, minimum {Min})",
42+
samples.Length, MinDurationSamples);
43+
return new SpeechTranscriptionResult(string.Empty, language ?? "en", 0f);
44+
}
45+
3546
var segments = new List<string>();
3647
string detectedLanguage = language ?? "en";
3748

src/LiveLingo.Desktop/Platform/Windows/Win32AudioCaptureService.cs

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ internal sealed class Win32AudioCaptureService : IAudioCaptureService
1818
private readonly MemoryStream _capturedData = new();
1919
private readonly object _gate = new();
2020
private bool _isRecording;
21-
private TaskCompletionSource? _stopTcs;
2221

2322
public bool IsRecording
2423
{
@@ -94,10 +93,9 @@ public Task<AudioCaptureResult> StopAsync(CancellationToken ct = default)
9493
{
9594
if (!_isRecording)
9695
throw new InvalidOperationException("Not recording.");
96+
_isRecording = false;
9797
}
9898

99-
_stopTcs = new TaskCompletionSource();
100-
10199
WaveIn.waveInStop(_hWaveIn);
102100
WaveIn.waveInReset(_hWaveIn);
103101

@@ -116,13 +114,9 @@ public Task<AudioCaptureResult> StopAsync(CancellationToken ct = default)
116114
WaveIn.waveInClose(_hWaveIn);
117115
_hWaveIn = IntPtr.Zero;
118116

119-
lock (_gate) _isRecording = false;
120-
121117
var pcm = _capturedData.ToArray();
122118
var duration = TimeSpan.FromSeconds((double)pcm.Length / (SampleRate * Channels * BitsPerSample / 8));
123-
var result = new AudioCaptureResult(pcm, SampleRate, Channels, duration);
124-
125-
return Task.FromResult(result);
119+
return Task.FromResult(new AudioCaptureResult(pcm, SampleRate, Channels, duration));
126120
}
127121

128122
private readonly WaveIn.WaveInProc _waveInCallback = (hWaveIn, msg, _, headerPtr, _) =>
@@ -141,24 +135,21 @@ private void WaveInCallback(IntPtr hWaveIn, uint msg, IntPtr instance, IntPtr he
141135
{
142136
if (msg != WaveIn.WIM_DATA) return;
143137

144-
var header = Marshal.PtrToStructure<WaveIn.WAVEHDR>(headerPtr);
145-
if (header.dwBytesRecorded > 0)
138+
lock (_gate)
146139
{
147-
var data = new byte[header.dwBytesRecorded];
148-
Marshal.Copy(header.lpData, data, 0, (int)header.dwBytesRecorded);
149-
lock (_gate)
140+
if (!_isRecording) return;
141+
142+
var header = Marshal.PtrToStructure<WaveIn.WAVEHDR>(headerPtr);
143+
if (header.dwBytesRecorded > 0)
150144
{
151-
if (_isRecording && _capturedData.Length < MaxRecordingSeconds * SampleRate * Channels * BitsPerSample / 8)
145+
var data = new byte[header.dwBytesRecorded];
146+
Marshal.Copy(header.lpData, data, 0, (int)header.dwBytesRecorded);
147+
if (_capturedData.Length < MaxRecordingSeconds * SampleRate * Channels * BitsPerSample / 8)
152148
_capturedData.Write(data, 0, data.Length);
153149
}
154-
}
155150

156-
lock (_gate)
157-
{
158-
if (_isRecording && _hWaveIn != IntPtr.Zero)
159-
{
151+
if (_hWaveIn != IntPtr.Zero)
160152
WaveIn.waveInAddBuffer(_hWaveIn, headerPtr, (uint)Marshal.SizeOf<WaveIn.WAVEHDR>());
161-
}
162153
}
163154
}
164155

0 commit comments

Comments
 (0)