Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ function App() {

const scribearStatus = useSelector((state: RootState) => state.APIStatusReducer?.scribearServerStatus as number);
const scribearMessage = useSelector((state: RootState) => (state.APIStatusReducer as any)?.scribearServerMessage as string | undefined);
const micNoAudio = useSelector((state: RootState) => (state.ControlReducer as any)?.micNoAudio as boolean | undefined);
const listening = useSelector((state: RootState) => (state.ControlReducer as any)?.listening as boolean | undefined);

const [snackbarOpen, setSnackbarOpen] = useState(false);
const [snackbarMsg, setSnackbarMsg] = useState('');
Expand All @@ -39,6 +41,41 @@ function App() {
}
}, [scribearStatus]);

useEffect(() => {
// show mic inactivity when mic is on but no audio chunks are received
if (listening && micNoAudio) {
setSnackbarMsg('Microphone is active but no audio detected');
setSnackbarSeverity('warning');
setSnackbarOpen(true);
}

// When listening turns ON, start a one-shot timer that expects at least one ondataavailable
// call within thresholdMs. This avoids firing on normal silent pauses after audio has been
// received previously. We only trigger inactivity if no blob arrives at all after enabling mic.
const thresholdMs = 3000;
try {
if (listening) {
try { (window as any).__hasReceivedAudio = false; } catch (e) {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hesitate to pollute to window namespace with additional props. I don't think it is necessary to do so in order to implement mic activity.

It seems to me that the logic for setting the micNoAudio flag can be fully encapsulated within the recognizers themselves. This portion here would simply be fetching listening and micNoAudio from redux and showing/hiding the snackbar if both are true in useEffect. Am I missing something?

if ((window as any).__initialAudioTimer) { try { clearTimeout((window as any).__initialAudioTimer); } catch (e) {} }
(window as any).__initialAudioTimer = setTimeout(() => {
try {
const has = (window as any).__hasReceivedAudio === true;
if (!has) {
try { (window as any).store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: true }); } catch (e) {}
}
} catch (e) {}
}, thresholdMs);
} else {
// listening turned off: clear initial timer and ensure flag reset
try { if ((window as any).__initialAudioTimer) { clearTimeout((window as any).__initialAudioTimer); (window as any).__initialAudioTimer = null; } } catch (e) {}
try { (window as any).store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); } catch (e) {}
}
} catch (e) {
console.warn('Failed to start initial mic monitor', e);
}
// no cleanup needed here because we clear/set timer when listening toggles
}, [listening, micNoAudio]);

const handleClose = (_event?: React.SyntheticEvent | Event, reason?: string) => {
if (reason === 'clickaway') return;
setSnackbarOpen(false);
Expand Down
50 changes: 50 additions & 0 deletions src/components/api/scribearServer/scribearRecognizer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export class ScribearRecognizer implements Recognizer {
private language: string
private recorder?: RecordRTC;
private kSampleRate = 16000;
private lastAudioTimestamp: number | null = null;
private inactivityInterval: any = null;

urlParams = new URLSearchParams(window.location.search);
mode = this.urlParams.get('mode');
Expand Down Expand Up @@ -58,13 +60,52 @@ export class ScribearRecognizer implements Recognizer {
desiredSampRate: this.kSampleRate,
timeSlice: 50,
ondataavailable: async (blob: Blob) => {
// update last audio timestamp and mark that we've received at least one audio chunk
this.lastAudioTimestamp = Date.now();
try { (window as any).__lastAudioTimestamp = this.lastAudioTimestamp; } catch (e) {}
try { (window as any).__hasReceivedAudio = true; if ((window as any).__initialAudioTimer) { clearTimeout((window as any).__initialAudioTimer); (window as any).__initialAudioTimer = null; } } catch (e) {}
try {
const controlState = (store.getState() as any).ControlReducer;
if (controlState?.micNoAudio === true) {
store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
}
} catch (e) {
console.warn('Failed to clear mic inactivity', e);
}
this.socket?.send(blob);
},
recorderType: StereoAudioRecorder,
numberOfAudioChannels: 1,
});

this.recorder.startRecording();

// start inactivity monitor
const thresholdMs = 3000;
if (this.inactivityInterval == null) {
this.inactivityInterval = setInterval(() => {
try {
const state: any = store.getState();
const listening = state.ControlReducer?.listening === true;
const micNoAudio = state.ControlReducer?.micNoAudio === true;
if (listening) {
if (!this.lastAudioTimestamp || (Date.now() - this.lastAudioTimestamp > thresholdMs)) {
if (!micNoAudio) {
store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: true });
}
} else {
if (micNoAudio) {
store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
}
}
} else {
if (micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
}
} catch (e) {
console.warn('Error in mic inactivity interval', e);
}
}, 1000);
}
}

/**
Expand Down Expand Up @@ -179,6 +220,15 @@ export class ScribearRecognizer implements Recognizer {
if (!this.socket) { return; }
this.socket.close();
this.socket = null;
if (this.inactivityInterval) {
clearInterval(this.inactivityInterval);
this.inactivityInterval = null;
}
try {
store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
} catch (e) {
console.warn('Failed to clear mic inactivity on stop', e);
}
}

/**
Expand Down
50 changes: 50 additions & 0 deletions src/components/api/whisper/whisperRecognizer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ export class WhisperRecognizer implements Recognizer {
private num_threads: number;

private transcribed_callback: ((newFinalBlocks: Array<TranscriptBlock>, newInProgressBlock: TranscriptBlock) => void) | null = null;
private lastAudioTimestamp: number | null = null;
private inactivityInterval: any = null;

/**
* Creates an Whisper recognizer instance that listens to the default microphone
Expand Down Expand Up @@ -135,6 +137,20 @@ export class WhisperRecognizer implements Recognizer {
pcm_data = Float32Concat(last_suffix, pcm_data);
last_suffix = pcm_data.slice(-(pcm_data.length % 128))

// update last audio timestamp and mark that we've received at least one audio chunk
this.lastAudioTimestamp = Date.now();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Date.now() is subject to the user's device time changing (not monotonic). Perhaps performance.now() would make sense instead?

try { (window as any).__lastAudioTimestamp = this.lastAudioTimestamp; } catch (e) {}
try { (window as any).__hasReceivedAudio = true; if ((window as any).__initialAudioTimer) { clearTimeout((window as any).__initialAudioTimer); (window as any).__initialAudioTimer = null; } } catch (e) {}
try {
const { store } = require('../../../store');
const controlState = (store.getState() as any).ControlReducer;
if (controlState?.micNoAudio === true) {
store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
}
} catch (e) {
console.warn('Failed to clear mic inactivity (whisper)', e);
}

// Feed process_recorder_message audio in 128 sample chunks
for (let i = 0; i < pcm_data.length - 127; i+= 128) {
const audio_chunk = pcm_data.subarray(i, i + 128)
Expand All @@ -149,6 +165,29 @@ export class WhisperRecognizer implements Recognizer {

this.recorder.startRecording();
console.log("Whisper: Done setting up audio context");

const thresholdMs = 3000;
if (this.inactivityInterval == null) {
const { store } = require('../../../store');
this.inactivityInterval = setInterval(() => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels to me like a timeout would be more natural here rather than an interval.

e.g. Set timeout of thresholdMs and reset timeout every time audio chunk is received. On timeout fired, micNoAudio to true.

Semantically, micNoAudio means no audio received for thresholdMs.

try {
const state: any = store.getState();
const listening = state.ControlReducer?.listening === true;
const micNoAudio = state.ControlReducer?.micNoAudio === true;
if (listening) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this checked before showing the snackbar? Is it necessary to fetch state from redux here?

We generally want to limit the use of getState() outside of redux.

if (!this.lastAudioTimestamp || (Date.now() - this.lastAudioTimestamp > thresholdMs)) {
if (!micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: true });
} else {
if (micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
}
} else {
if (micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
}
} catch (e) {
console.warn('Error in whisper mic inactivity interval', e);
}
}, 1000);
}
}

private async load_model(model: string) {
Expand Down Expand Up @@ -257,6 +296,17 @@ export class WhisperRecognizer implements Recognizer {
this.whisper.set_status("paused");
this.context.suspend();
this.recorder?.stopRecording();

if (this.inactivityInterval) {
clearInterval(this.inactivityInterval);
this.inactivityInterval = null;
}
try {
const { store } = require('../../../store');
store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false });
} catch (e) {
console.warn('Failed to clear mic inactivity on whisper stop', e);
}
}

/**
Expand Down
3 changes: 3 additions & 0 deletions src/react-redux&middleware/redux/reducers/controlReducers.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const initialControlState : ControlStatus = {
showMFCC: false,
showSpeaker: false,
showIntent: false,
micNoAudio: false,
}

export function ControlReducer(state = initialControlState, action) {
Expand All @@ -41,6 +42,8 @@ export function ControlReducer(state = initialControlState, action) {
return { ...state, showIntent: !state.showIntent };
case 'FLIP_RECORDING_PHRASE':
return { ...state, listening: action.payload};
case 'SET_MIC_INACTIVITY':
return { ...state, micNoAudio: action.payload };
case 'SET_SPEECH_LANGUAGE':
return {
...state,
Expand Down
1 change: 1 addition & 0 deletions src/react-redux&middleware/redux/types/controlStatus.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ export type ControlStatus = {
showMFCC: boolean
showSpeaker: boolean
showIntent: boolean
micNoAudio?: boolean
}
2 changes: 2 additions & 0 deletions src/store.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,5 @@ export const store = configureStore({
});

export type RootState = ReturnType<typeof rootReducer>
(window as any).store = store;