diff --git a/src/UI/SettingsForm.cs b/src/UI/SettingsForm.cs index 69020d3..4990e7f 100644 --- a/src/UI/SettingsForm.cs +++ b/src/UI/SettingsForm.cs @@ -1642,6 +1642,20 @@ private void OnTestVoiceClick(object sender, EventArgs e) { if (_agentManager?.IsLoaded == true) { + // Apply current slider values before testing + _agentManager.SetSpeechSpeed(_speedTrackBar.Value); + _agentManager.SetSpeechPitch(_pitchTrackBar.Value); + + // Convert volume from 0-100 to 0-65535 + int volumeValue = (int)(_volumeTrackBar.Value * VolumeScaleFactor); + _agentManager.SetSpeechVolume(volumeValue); + + // Apply selected voice if one is selected + if (_voiceComboBox.SelectedItem is VoiceInfo voice) + { + _agentManager.SetTTSModeID(voice.ModeId ?? voice.Id); + } + _agentManager.Speak("This is a test of the text to speech voice."); } else diff --git a/src/Voice/Sapi4Interop.cs b/src/Voice/Sapi4Interop.cs new file mode 100644 index 0000000..bce1687 --- /dev/null +++ b/src/Voice/Sapi4Interop.cs @@ -0,0 +1,71 @@ +using System; +using System.Runtime.InteropServices; + +namespace MSAgentAI.Voice +{ + /// + /// SAPI4 COM interface definitions + /// + + // CLSID and IID constants + public static class Sapi4Constants + { + // {D67C0280-C743-11cd-80E5-00AA003E4B50} + public static readonly Guid CLSID_TTSEnumerator = new Guid("D67C0280-C743-11cd-80E5-00AA003E4B50"); + + // {6B837B20-2A59-11cf-A2CC-00AA00A8D5E5} + public static readonly Guid IID_ITTSEnum = new Guid("6B837B20-2A59-11cf-A2CC-00AA00A8D5E5"); + } + + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)] + public struct TTSMODEINFO + { + public Guid gModeID; + [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)] + public string szModeName; + [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)] + public string szMfgName; + [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)] + public string szProductName; + public Guid gMfgID; + public Guid gProductID; + public ushort wEngineVersion; + public ushort wGender; + public ushort wAge; + public ushort wStyle; + public ushort wSpeaker; + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] + public uint[] dwLanguage; + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] + public uint[] dwDialect; + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 32)] + public byte[] abReserved; + } + + [ComImport] + [Guid("6B837B20-2A59-11cf-A2CC-00AA00A8D5E5")] + [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)] + public interface ITTSEnum + { + [PreserveSig] + int Next( + uint celt, + [Out] out TTSMODEINFO pNext, + [Out] out uint pceltFetched); + + [PreserveSig] + int Skip(uint celt); + + [PreserveSig] + int Reset(); + + [PreserveSig] + int Clone([Out, MarshalAs(UnmanagedType.Interface)] out ITTSEnum ppEnum); + + [PreserveSig] + int Select( + [In] ref Guid gModeID, + [Out, MarshalAs(UnmanagedType.Interface)] out object ppITTSCentral, + [In, MarshalAs(UnmanagedType.IUnknown)] object pAudioDest); + } +} diff --git a/src/Voice/Sapi4Manager.cs b/src/Voice/Sapi4Manager.cs index 03fa509..125bfd2 100644 --- a/src/Voice/Sapi4Manager.cs +++ b/src/Voice/Sapi4Manager.cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using System.Runtime.InteropServices; -using Microsoft.Win32; using MSAgentAI.Logging; namespace MSAgentAI.Voice @@ -62,35 +61,30 @@ private void InitializeVoiceEngine() } /// - /// Gets available SAPI4 TTS Modes from the registry (the way MS Agent/CyberBuddy does it) - /// Looks in HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode - /// Each voice has a ModeID GUID that is used to set the TTS mode in MS Agent + /// Gets available SAPI4 voices using proper COM enumeration /// public List GetAvailableVoices() { var voices = new List(); - Logger.Log("Enumerating SAPI4 TTS Modes..."); + Logger.Log("Enumerating SAPI4 voices via COM..."); try { - // Primary location: TTS Modes in Speech registry (CyberBuddy approach) - // HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode - EnumerateTTSModes(voices); - - // Fallback: Check for voices using Tokens (newer SAPI) - EnumerateVoiceTokens(voices); + // Use COM enumeration - the proper SAPI4 way + EnumerateViaCOM(voices); } catch (Exception ex) { Logger.LogError("Error enumerating SAPI4 voices", ex); } - Logger.Log($"Found {voices.Count} SAPI4 TTS modes"); + Logger.Log($"Found {voices.Count} SAPI4 voices"); // Add default if none found if (voices.Count == 0) { + Logger.Log("No SAPI4 voices found. Adding default placeholder."); voices.Add(new VoiceInfo { Id = "default", @@ -104,132 +98,85 @@ public List GetAvailableVoices() } /// - /// Enumerate TTS Modes from the registry - this is the proper SAPI4 way + /// Enumerate SAPI4 voices using COM ITTSEnum interface - the proper way /// - private void EnumerateTTSModes(List voices) + private void EnumerateViaCOM(List voices) { - // Check both 32-bit and 64-bit registry locations - string[] registryPaths = new[] - { - @"SOFTWARE\Microsoft\Speech\Voices\TTSMode", - @"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\TTSMode", - @"SOFTWARE\Microsoft\Speech\Voices\Tokens", - @"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\Tokens" - }; - - foreach (var basePath in registryPaths) + ITTSEnum ttsEnum = null; + + try { - try + // Create SAPI4 TTS Enumerator + Type ttsEnumType = Type.GetTypeFromCLSID(Sapi4Constants.CLSID_TTSEnumerator); + if (ttsEnumType == null) { - using (var key = Registry.LocalMachine.OpenSubKey(basePath)) - { - if (key == null) continue; - - foreach (var modeName in key.GetSubKeyNames()) - { - try - { - using (var modeKey = key.OpenSubKey(modeName)) - { - if (modeKey == null) continue; + Logger.Log("SAPI4 TTSEnumerator not found. SAPI4 may not be installed."); + return; + } - // Get ModeID (GUID) - this is what MS Agent needs - string modeId = modeName; - - // Try to get ModeID from subkey value if it exists - var modeIdValue = modeKey.GetValue("ModeID"); - if (modeIdValue != null) - { - modeId = modeIdValue.ToString(); - } + object ttsEnumObj = Activator.CreateInstance(ttsEnumType); + ttsEnum = (ITTSEnum)ttsEnumObj; - // Get display name from various possible locations - string displayName = modeKey.GetValue("")?.ToString(); - if (string.IsNullOrEmpty(displayName)) - { - displayName = modeKey.GetValue("VoiceName")?.ToString(); - } - if (string.IsNullOrEmpty(displayName)) - { - // Check Attributes subkey - using (var attrKey = modeKey.OpenSubKey("Attributes")) - { - if (attrKey != null) - { - displayName = attrKey.GetValue("Name")?.ToString(); - } - } - } - if (string.IsNullOrEmpty(displayName)) - { - displayName = modeName; - } + // Reset to start of enumeration + ttsEnum.Reset(); - // Skip duplicates - if (voices.Exists(v => v.ModeId == modeId || v.Name == displayName)) - continue; + // Enumerate all voices + TTSMODEINFO modeInfo; + uint fetched; + int hr; - voices.Add(new VoiceInfo - { - Id = modeId, - Name = displayName, - ModeId = modeId, - IsSapi4 = true - }); + while (true) + { + hr = ttsEnum.Next(1, out modeInfo, out fetched); + + // S_OK (0) with fetched > 0 means we got an item + // S_FALSE (1) or any other non-zero hr means no more items + if (hr != 0 || fetched == 0) + break; + + // Add voice to list + string modeName = modeInfo.szModeName ?? ""; + string productName = modeInfo.szProductName ?? ""; + + // Build display name + string displayName = modeName; + if (string.IsNullOrEmpty(displayName)) + displayName = productName; + if (string.IsNullOrEmpty(displayName)) + displayName = modeInfo.gModeID.ToString(); + + // Skip duplicates + string modeIdStr = modeInfo.gModeID.ToString("B"); // Format as {GUID} + if (voices.Exists(v => v.ModeId == modeIdStr)) + continue; + + voices.Add(new VoiceInfo + { + Id = modeIdStr, + Name = displayName, + ModeId = modeIdStr, + IsSapi4 = true + }); - Logger.Log($"Found SAPI4 TTS Mode: {displayName} (ModeID: {modeId})"); - } - } - catch { } - } - } + Logger.Log($"Found SAPI4 voice: {displayName} (ModeID: {modeIdStr})"); } - catch { } } - } - - /// - /// Enumerate voice tokens (SAPI5 style, fallback) - /// - private void EnumerateVoiceTokens(List voices) - { - try + catch (Exception ex) + { + Logger.LogError("Error in COM enumeration", ex); + } + finally { - // Also try OneCore voices (Windows 10+) as fallback - using (var key = Registry.LocalMachine.OpenSubKey(@"SOFTWARE\Microsoft\Speech_OneCore\Voices\Tokens")) + // Release COM object + if (ttsEnum != null) { - if (key == null) return; - - foreach (var tokenName in key.GetSubKeyNames()) + try { - try - { - using (var tokenKey = key.OpenSubKey(tokenName)) - { - if (tokenKey == null) continue; - - string displayName = tokenKey.GetValue("")?.ToString() ?? tokenName; - - // Skip duplicates - if (voices.Exists(v => v.Name == displayName)) - continue; - - voices.Add(new VoiceInfo - { - Id = tokenName, - Name = displayName + " (SAPI5)", - ModeId = tokenName, - IsSapi4 = false - }); - - Logger.Log($"Found SAPI5 voice: {displayName}"); - } - } - catch { } + Marshal.ReleaseComObject(ttsEnum); } + catch { } } } - catch { } } ///