From 75d1c5448b8822e18ca6d8fda0938c3de2858040 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 14 Dec 2025 00:17:57 +0000 Subject: [PATCH 1/6] Initial plan From ecaf35913d771054f72c1a9fb2cf094b1029726b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 14 Dec 2025 00:21:01 +0000 Subject: [PATCH 2/6] Fix TTS picker: remove SAPI5 voices, make sliders work with Test Voice button Co-authored-by: ExtCan <60326708+ExtCan@users.noreply.github.com> --- src/UI/SettingsForm.cs | 14 ++++++++++++++ src/Voice/Sapi4Manager.cs | 3 +-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/UI/SettingsForm.cs b/src/UI/SettingsForm.cs index 69020d3..4990e7f 100644 --- a/src/UI/SettingsForm.cs +++ b/src/UI/SettingsForm.cs @@ -1642,6 +1642,20 @@ private void OnTestVoiceClick(object sender, EventArgs e) { if (_agentManager?.IsLoaded == true) { + // Apply current slider values before testing + _agentManager.SetSpeechSpeed(_speedTrackBar.Value); + _agentManager.SetSpeechPitch(_pitchTrackBar.Value); + + // Convert volume from 0-100 to 0-65535 + int volumeValue = (int)(_volumeTrackBar.Value * VolumeScaleFactor); + _agentManager.SetSpeechVolume(volumeValue); + + // Apply selected voice if one is selected + if (_voiceComboBox.SelectedItem is VoiceInfo voice) + { + _agentManager.SetTTSModeID(voice.ModeId ?? voice.Id); + } + _agentManager.Speak("This is a test of the text to speech voice."); } else diff --git a/src/Voice/Sapi4Manager.cs b/src/Voice/Sapi4Manager.cs index 03fa509..06b675d 100644 --- a/src/Voice/Sapi4Manager.cs +++ b/src/Voice/Sapi4Manager.cs @@ -78,8 +78,7 @@ public List GetAvailableVoices() // HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode EnumerateTTSModes(voices); - // Fallback: Check for voices using Tokens (newer SAPI) - EnumerateVoiceTokens(voices); + // NOTE: EnumerateVoiceTokens is removed - only show SAPI4 voices, not SAPI5 } catch (Exception ex) { From b42dcb76fa9c3824598731337e6e5ee18d1836b1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 14 Dec 2025 02:20:48 +0000 Subject: [PATCH 3/6] Fix SAPI4 voice enumeration and make sliders work with Test Voice - Remove SAPI5 Tokens paths from EnumerateTTSModes (only use TTSMode registry paths) - Remove EnumerateVoiceTokens call to exclude SAPI5 voices entirely - Make Test Voice button apply current speed, pitch, volume, and voice before testing Co-authored-by: ExtCan <60326708+ExtCan@users.noreply.github.com> --- src/Voice/Sapi4Manager.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Voice/Sapi4Manager.cs b/src/Voice/Sapi4Manager.cs index 06b675d..ee7e912 100644 --- a/src/Voice/Sapi4Manager.cs +++ b/src/Voice/Sapi4Manager.cs @@ -78,7 +78,7 @@ public List GetAvailableVoices() // HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode EnumerateTTSModes(voices); - // NOTE: EnumerateVoiceTokens is removed - only show SAPI4 voices, not SAPI5 + // Don't include SAPI5 voices - only SAPI4 TTS Modes are compatible with MS Agent } catch (Exception ex) { @@ -107,13 +107,11 @@ public List GetAvailableVoices() /// private void EnumerateTTSModes(List voices) { - // Check both 32-bit and 64-bit registry locations + // Check both 32-bit and 64-bit registry locations for SAPI4 TTS Modes only string[] registryPaths = new[] { @"SOFTWARE\Microsoft\Speech\Voices\TTSMode", - @"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\TTSMode", - @"SOFTWARE\Microsoft\Speech\Voices\Tokens", - @"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\Tokens" + @"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\TTSMode" }; foreach (var basePath in registryPaths) From 01b46904516e1de2a5bc4319c1fe89f56687b090 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 14 Dec 2025 02:22:52 +0000 Subject: [PATCH 4/6] Improve comments for clarity based on code review Co-authored-by: ExtCan <60326708+ExtCan@users.noreply.github.com> --- src/Voice/Sapi4Manager.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Voice/Sapi4Manager.cs b/src/Voice/Sapi4Manager.cs index ee7e912..71ed245 100644 --- a/src/Voice/Sapi4Manager.cs +++ b/src/Voice/Sapi4Manager.cs @@ -78,7 +78,7 @@ public List GetAvailableVoices() // HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode EnumerateTTSModes(voices); - // Don't include SAPI5 voices - only SAPI4 TTS Modes are compatible with MS Agent + // Exclude SAPI5 voices as MS Agent requires SAPI4 TTS ModeID format for voice selection } catch (Exception ex) { @@ -107,7 +107,7 @@ public List GetAvailableVoices() /// private void EnumerateTTSModes(List voices) { - // Check both 32-bit and 64-bit registry locations for SAPI4 TTS Modes only + // Check both 32-bit and 64-bit registry locations for SAPI4 TTS Modes only (SAPI5 Tokens paths removed) string[] registryPaths = new[] { @"SOFTWARE\Microsoft\Speech\Voices\TTSMode", From 09e30037e616a83bea8ad23dc824e43de090ca05 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 14 Dec 2025 02:35:01 +0000 Subject: [PATCH 5/6] Rewrite SAPI4 voice enumeration using proper COM interfaces - Create Sapi4Interop.cs with proper COM interface definitions (ITTSEnum, TTSMODEINFO) - Replace registry-based enumeration with COM-based enumeration using ITTSEnum - Use CLSID_TTSEnumerator and IID_ITTSEnum to properly enumerate SAPI4 voices - Remove incorrect registry and SAPI5 voice enumeration code - This follows the standard SAPI4 approach used by other TTS applications Co-authored-by: ExtCan <60326708+ExtCan@users.noreply.github.com> --- src/Voice/Sapi4Interop.cs | 71 +++++++++++++++ src/Voice/Sapi4Manager.cs | 186 ++++++++++++++------------------------ 2 files changed, 139 insertions(+), 118 deletions(-) create mode 100644 src/Voice/Sapi4Interop.cs diff --git a/src/Voice/Sapi4Interop.cs b/src/Voice/Sapi4Interop.cs new file mode 100644 index 0000000..bce1687 --- /dev/null +++ b/src/Voice/Sapi4Interop.cs @@ -0,0 +1,71 @@ +using System; +using System.Runtime.InteropServices; + +namespace MSAgentAI.Voice +{ + /// + /// SAPI4 COM interface definitions + /// + + // CLSID and IID constants + public static class Sapi4Constants + { + // {D67C0280-C743-11cd-80E5-00AA003E4B50} + public static readonly Guid CLSID_TTSEnumerator = new Guid("D67C0280-C743-11cd-80E5-00AA003E4B50"); + + // {6B837B20-2A59-11cf-A2CC-00AA00A8D5E5} + public static readonly Guid IID_ITTSEnum = new Guid("6B837B20-2A59-11cf-A2CC-00AA00A8D5E5"); + } + + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)] + public struct TTSMODEINFO + { + public Guid gModeID; + [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)] + public string szModeName; + [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)] + public string szMfgName; + [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)] + public string szProductName; + public Guid gMfgID; + public Guid gProductID; + public ushort wEngineVersion; + public ushort wGender; + public ushort wAge; + public ushort wStyle; + public ushort wSpeaker; + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] + public uint[] dwLanguage; + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] + public uint[] dwDialect; + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 32)] + public byte[] abReserved; + } + + [ComImport] + [Guid("6B837B20-2A59-11cf-A2CC-00AA00A8D5E5")] + [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)] + public interface ITTSEnum + { + [PreserveSig] + int Next( + uint celt, + [Out] out TTSMODEINFO pNext, + [Out] out uint pceltFetched); + + [PreserveSig] + int Skip(uint celt); + + [PreserveSig] + int Reset(); + + [PreserveSig] + int Clone([Out, MarshalAs(UnmanagedType.Interface)] out ITTSEnum ppEnum); + + [PreserveSig] + int Select( + [In] ref Guid gModeID, + [Out, MarshalAs(UnmanagedType.Interface)] out object ppITTSCentral, + [In, MarshalAs(UnmanagedType.IUnknown)] object pAudioDest); + } +} diff --git a/src/Voice/Sapi4Manager.cs b/src/Voice/Sapi4Manager.cs index 71ed245..5cc7816 100644 --- a/src/Voice/Sapi4Manager.cs +++ b/src/Voice/Sapi4Manager.cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using System.Runtime.InteropServices; -using Microsoft.Win32; using MSAgentAI.Logging; namespace MSAgentAI.Voice @@ -62,34 +61,30 @@ private void InitializeVoiceEngine() } /// - /// Gets available SAPI4 TTS Modes from the registry (the way MS Agent/CyberBuddy does it) - /// Looks in HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode - /// Each voice has a ModeID GUID that is used to set the TTS mode in MS Agent + /// Gets available SAPI4 voices using proper COM enumeration /// public List GetAvailableVoices() { var voices = new List(); - Logger.Log("Enumerating SAPI4 TTS Modes..."); + Logger.Log("Enumerating SAPI4 voices via COM..."); try { - // Primary location: TTS Modes in Speech registry (CyberBuddy approach) - // HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode - EnumerateTTSModes(voices); - - // Exclude SAPI5 voices as MS Agent requires SAPI4 TTS ModeID format for voice selection + // Use COM enumeration - the proper SAPI4 way + EnumerateViaCOM(voices); } catch (Exception ex) { Logger.LogError("Error enumerating SAPI4 voices", ex); } - Logger.Log($"Found {voices.Count} SAPI4 TTS modes"); + Logger.Log($"Found {voices.Count} SAPI4 voices"); // Add default if none found if (voices.Count == 0) { + Logger.Log("No SAPI4 voices found. Adding default placeholder."); voices.Add(new VoiceInfo { Id = "default", @@ -103,130 +98,85 @@ public List GetAvailableVoices() } /// - /// Enumerate TTS Modes from the registry - this is the proper SAPI4 way + /// Enumerate SAPI4 voices using COM ITTSEnum interface - the proper way /// - private void EnumerateTTSModes(List voices) + private void EnumerateViaCOM(List voices) { - // Check both 32-bit and 64-bit registry locations for SAPI4 TTS Modes only (SAPI5 Tokens paths removed) - string[] registryPaths = new[] - { - @"SOFTWARE\Microsoft\Speech\Voices\TTSMode", - @"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\TTSMode" - }; - - foreach (var basePath in registryPaths) + ITTSEnum ttsEnum = null; + + try { - try + // Create SAPI4 TTS Enumerator + Type ttsEnumType = Type.GetTypeFromCLSID(Sapi4Constants.CLSID_TTSEnumerator); + if (ttsEnumType == null) { - using (var key = Registry.LocalMachine.OpenSubKey(basePath)) - { - if (key == null) continue; - - foreach (var modeName in key.GetSubKeyNames()) - { - try - { - using (var modeKey = key.OpenSubKey(modeName)) - { - if (modeKey == null) continue; + Logger.Log("SAPI4 TTSEnumerator not found. SAPI4 may not be installed."); + return; + } - // Get ModeID (GUID) - this is what MS Agent needs - string modeId = modeName; - - // Try to get ModeID from subkey value if it exists - var modeIdValue = modeKey.GetValue("ModeID"); - if (modeIdValue != null) - { - modeId = modeIdValue.ToString(); - } + object ttsEnumObj = Activator.CreateInstance(ttsEnumType); + ttsEnum = (ITTSEnum)ttsEnumObj; - // Get display name from various possible locations - string displayName = modeKey.GetValue("")?.ToString(); - if (string.IsNullOrEmpty(displayName)) - { - displayName = modeKey.GetValue("VoiceName")?.ToString(); - } - if (string.IsNullOrEmpty(displayName)) - { - // Check Attributes subkey - using (var attrKey = modeKey.OpenSubKey("Attributes")) - { - if (attrKey != null) - { - displayName = attrKey.GetValue("Name")?.ToString(); - } - } - } - if (string.IsNullOrEmpty(displayName)) - { - displayName = modeName; - } + // Reset to start of enumeration + ttsEnum.Reset(); - // Skip duplicates - if (voices.Exists(v => v.ModeId == modeId || v.Name == displayName)) - continue; + // Enumerate all voices + TTSMODEINFO modeInfo; + uint fetched; + int hr; - voices.Add(new VoiceInfo - { - Id = modeId, - Name = displayName, - ModeId = modeId, - IsSapi4 = true - }); + while (true) + { + hr = ttsEnum.Next(1, out modeInfo, out fetched); + + // S_FALSE (1) means no more items + if (hr != 0 || fetched == 0) + break; + + // Add voice to list + string modeName = modeInfo.szModeName ?? ""; + string mfgName = modeInfo.szMfgName ?? ""; + string productName = modeInfo.szProductName ?? ""; + + // Build display name + string displayName = modeName; + if (string.IsNullOrEmpty(displayName)) + displayName = productName; + if (string.IsNullOrEmpty(displayName)) + displayName = modeInfo.gModeID.ToString(); + + // Skip duplicates + string modeIdStr = modeInfo.gModeID.ToString("B"); // Format as {GUID} + if (voices.Exists(v => v.ModeId == modeIdStr)) + continue; + + voices.Add(new VoiceInfo + { + Id = modeIdStr, + Name = displayName, + ModeId = modeIdStr, + IsSapi4 = true + }); - Logger.Log($"Found SAPI4 TTS Mode: {displayName} (ModeID: {modeId})"); - } - } - catch { } - } - } + Logger.Log($"Found SAPI4 voice: {displayName} (ModeID: {modeIdStr})"); } - catch { } } - } - - /// - /// Enumerate voice tokens (SAPI5 style, fallback) - /// - private void EnumerateVoiceTokens(List voices) - { - try + catch (Exception ex) + { + Logger.LogError("Error in COM enumeration", ex); + } + finally { - // Also try OneCore voices (Windows 10+) as fallback - using (var key = Registry.LocalMachine.OpenSubKey(@"SOFTWARE\Microsoft\Speech_OneCore\Voices\Tokens")) + // Release COM object + if (ttsEnum != null) { - if (key == null) return; - - foreach (var tokenName in key.GetSubKeyNames()) + try { - try - { - using (var tokenKey = key.OpenSubKey(tokenName)) - { - if (tokenKey == null) continue; - - string displayName = tokenKey.GetValue("")?.ToString() ?? tokenName; - - // Skip duplicates - if (voices.Exists(v => v.Name == displayName)) - continue; - - voices.Add(new VoiceInfo - { - Id = tokenName, - Name = displayName + " (SAPI5)", - ModeId = tokenName, - IsSapi4 = false - }); - - Logger.Log($"Found SAPI5 voice: {displayName}"); - } - } - catch { } + Marshal.ReleaseComObject(ttsEnum); } + catch { } } } - catch { } } /// From 54fb3b126c936f0a551c66c8bb5fa330e33c7517 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 14 Dec 2025 02:36:23 +0000 Subject: [PATCH 6/6] Address code review feedback - remove unused variable and clarify comments Co-authored-by: ExtCan <60326708+ExtCan@users.noreply.github.com> --- src/Voice/Sapi4Manager.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Voice/Sapi4Manager.cs b/src/Voice/Sapi4Manager.cs index 5cc7816..125bfd2 100644 --- a/src/Voice/Sapi4Manager.cs +++ b/src/Voice/Sapi4Manager.cs @@ -129,13 +129,13 @@ private void EnumerateViaCOM(List voices) { hr = ttsEnum.Next(1, out modeInfo, out fetched); - // S_FALSE (1) means no more items + // S_OK (0) with fetched > 0 means we got an item + // S_FALSE (1) or any other non-zero hr means no more items if (hr != 0 || fetched == 0) break; // Add voice to list string modeName = modeInfo.szModeName ?? ""; - string mfgName = modeInfo.szMfgName ?? ""; string productName = modeInfo.szProductName ?? ""; // Build display name