diff --git a/package.json b/package.json index 5be696df..e16e4a15 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "speechstate", "license": "GPL-3.0", - "version": "2.13.1", + "version": "2.14.0", "homepage": "http://localhost/speechstate", "main": "./dist/index.js", "types": "./dist/index.d.ts", diff --git a/src/speechstate.ts b/src/speechstate.ts index 23a8256e..c0f13d61 100644 --- a/src/speechstate.ts +++ b/src/speechstate.ts @@ -56,6 +56,8 @@ const speechstate = setup({ input: { azureAuthorizationToken: context.azureAuthorizationToken, ttsDefaultVoice: context.settings.ttsDefaultVoice, + ttsDefaultFillerDelay: context.settings.ttsDefaultFillerDelay, + ttsDefaultFiller: context.settings.ttsDefaultFiller, ttsLexicon: context.settings.ttsLexicon, audioContext: context.audioContext, azureRegion: context.settings.azureRegion, diff --git a/src/tts.ts b/src/tts.ts index 8263eda4..b52b6864 100644 --- a/src/tts.ts +++ b/src/tts.ts @@ -40,7 +40,8 @@ export const ttsMachine = setup({ return { buffer: context.buffer.substring(0, spaceIndex) + - " um," + + " " + + context.ttsDefaultFiller + context.buffer.substring(spaceIndex), }; }), @@ -300,6 +301,8 @@ export const ttsMachine = setup({ context: ({ input }) => ({ azureAuthorizationToken: input.azureAuthorizationToken, ttsDefaultVoice: input.ttsDefaultVoice || "en-US-DavisNeural", + ttsDefaultFillerDelay: input.ttsDefaultFillerDelay || 500, + ttsDefaultFiller: input.ttsDefaultFiller || "um,", ttsLexicon: input.ttsLexicon, audioContext: input.audioContext, azureRegion: input.azureRegion, @@ -333,10 +336,13 @@ export const ttsMachine = setup({ target: "BufferedSpeaker", guard: ({ event }) => !!event.value.stream, actions: assign({ - agenda: ({ event }) => + agenda: ({ context, event }) => event.value.fillerDelay ? event.value - : { ...event.value, fillerDelay: 500 }, + : { + ...event.value, + fillerDelay: context.ttsDefaultFillerDelay, + }, }), }, { diff --git a/src/types.ts b/src/types.ts index ed4945ee..dd89a289 100644 --- a/src/types.ts +++ b/src/types.ts @@ -26,6 +26,8 @@ export interface Settings { speechRecognitionEndpointId?: string; ttsDefaultVoice?: string; ttsLexicon?: string; + ttsDefaultFillerDelay?: number; + ttsDefaultFiller?: string; newTokenInterval?: number; } @@ -140,6 +142,8 @@ export interface TTSInit { audioContext: AudioContext; azureRegion: string; ttsDefaultVoice: string; + ttsDefaultFillerDelay?: number; + ttsDefaultFiller?: string; ttsLexicon?: string; locale: string; } diff --git a/test/server.mjs b/test/server.mjs index a8cf8fc0..c40a0d07 100644 --- a/test/server.mjs +++ b/test/server.mjs @@ -60,7 +60,7 @@ async function run() { } else { clearInterval(interval); } - }, 300); + }, 500); res.on("close", () => { clearInterval(interval); diff --git a/test/tts.test.ts b/test/tts.test.ts index d4fe4035..985ada48 100644 --- a/test/tts.test.ts +++ b/test/tts.test.ts @@ -11,6 +11,8 @@ describe("Synthesis test", async () => { return { ssRef: spawn(speechstate, { input: { + ttsDefaultFiller: "um the filler,", + ttsDefaultFillerDelay: 100, noPonyfill: false, azureRegion: "swedencentral", azureCredentials: { @@ -89,7 +91,7 @@ describe("Synthesis test", async () => { expect(snapshot).toBeTruthy(); }); - test("synthesise from stream", async () => { + test.only("synthesise from stream", async () => { actor.getSnapshot().context.ssRef.send({ type: "SPEAK", value: { utterance: "", stream: "http://localhost:3000/sse/1" }, @@ -98,6 +100,16 @@ describe("Synthesis test", async () => { expect(snapshot).toBeTruthy(); }); + test.only("synthesise from stream; different filler and timeout", async () => { + actor.getSnapshot().context.ssRef.send({ + type: "SPEAK", + value: { utterance: "", stream: "http://localhost:3000/sse/1", fillerDelay: 100_000 }, + }); + const snapshot = await waitForView(actor, "speaking", 1000); + expect(snapshot).toBeTruthy(); + }); + + test("synthesise from stream; stop and restart on CONTROL", async () => { actor.getSnapshot().context.ssRef.send({ type: "SPEAK", @@ -152,7 +164,7 @@ describe("Synthesis test", async () => { expect(snapshot).toBeTruthy(); }); - test.only("synthesise from stream, use cache", async () => { + test("synthesise from stream, use cache", async () => { actor.getSnapshot().context.ssRef.send({ type: "SPEAK", value: {