Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/UI/SettingsForm.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,20 @@ private void OnTestVoiceClick(object sender, EventArgs e)
{
if (_agentManager?.IsLoaded == true)
{
// Apply current slider values before testing
_agentManager.SetSpeechSpeed(_speedTrackBar.Value);
_agentManager.SetSpeechPitch(_pitchTrackBar.Value);

// Convert volume from 0-100 to 0-65535
int volumeValue = (int)(_volumeTrackBar.Value * VolumeScaleFactor);
_agentManager.SetSpeechVolume(volumeValue);

// Apply selected voice if one is selected
if (_voiceComboBox.SelectedItem is VoiceInfo voice)
{
_agentManager.SetTTSModeID(voice.ModeId ?? voice.Id);
}

_agentManager.Speak("This is a test of the text to speech voice.");
}
else
Expand Down
71 changes: 71 additions & 0 deletions src/Voice/Sapi4Interop.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
using System;
using System.Runtime.InteropServices;

namespace MSAgentAI.Voice
{
/// <summary>
/// SAPI4 COM interface definitions
/// </summary>

// CLSID and IID constants
public static class Sapi4Constants
{
// {D67C0280-C743-11cd-80E5-00AA003E4B50}
public static readonly Guid CLSID_TTSEnumerator = new Guid("D67C0280-C743-11cd-80E5-00AA003E4B50");

// {6B837B20-2A59-11cf-A2CC-00AA00A8D5E5}
public static readonly Guid IID_ITTSEnum = new Guid("6B837B20-2A59-11cf-A2CC-00AA00A8D5E5");
}

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)]
public struct TTSMODEINFO
{
public Guid gModeID;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)]
public string szModeName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)]
public string szMfgName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 256)]
public string szProductName;
public Guid gMfgID;
public Guid gProductID;
public ushort wEngineVersion;
public ushort wGender;
public ushort wAge;
public ushort wStyle;
public ushort wSpeaker;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)]
public uint[] dwLanguage;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)]
public uint[] dwDialect;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 32)]
public byte[] abReserved;
}

[ComImport]
[Guid("6B837B20-2A59-11cf-A2CC-00AA00A8D5E5")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
public interface ITTSEnum
{
[PreserveSig]
int Next(
uint celt,
[Out] out TTSMODEINFO pNext,
[Out] out uint pceltFetched);

[PreserveSig]
int Skip(uint celt);

[PreserveSig]
int Reset();

[PreserveSig]
int Clone([Out, MarshalAs(UnmanagedType.Interface)] out ITTSEnum ppEnum);

[PreserveSig]
int Select(
[In] ref Guid gModeID,
[Out, MarshalAs(UnmanagedType.Interface)] out object ppITTSCentral,
[In, MarshalAs(UnmanagedType.IUnknown)] object pAudioDest);
}
}
189 changes: 68 additions & 121 deletions src/Voice/Sapi4Manager.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using Microsoft.Win32;
using MSAgentAI.Logging;

namespace MSAgentAI.Voice
Expand Down Expand Up @@ -62,35 +61,30 @@ private void InitializeVoiceEngine()
}

/// <summary>
/// Gets available SAPI4 TTS Modes from the registry (the way MS Agent/CyberBuddy does it)
/// Looks in HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode
/// Each voice has a ModeID GUID that is used to set the TTS mode in MS Agent
/// Gets available SAPI4 voices using proper COM enumeration
/// </summary>
public List<VoiceInfo> GetAvailableVoices()
{
var voices = new List<VoiceInfo>();

Logger.Log("Enumerating SAPI4 TTS Modes...");
Logger.Log("Enumerating SAPI4 voices via COM...");

try
{
// Primary location: TTS Modes in Speech registry (CyberBuddy approach)
// HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\TTSMode
EnumerateTTSModes(voices);

// Fallback: Check for voices using Tokens (newer SAPI)
EnumerateVoiceTokens(voices);
// Use COM enumeration - the proper SAPI4 way
EnumerateViaCOM(voices);
}
catch (Exception ex)
{
Logger.LogError("Error enumerating SAPI4 voices", ex);
}

Logger.Log($"Found {voices.Count} SAPI4 TTS modes");
Logger.Log($"Found {voices.Count} SAPI4 voices");

// Add default if none found
if (voices.Count == 0)
{
Logger.Log("No SAPI4 voices found. Adding default placeholder.");
voices.Add(new VoiceInfo
{
Id = "default",
Expand All @@ -104,132 +98,85 @@ public List<VoiceInfo> GetAvailableVoices()
}

/// <summary>
/// Enumerate TTS Modes from the registry - this is the proper SAPI4 way
/// Enumerate SAPI4 voices using COM ITTSEnum interface - the proper way
/// </summary>
private void EnumerateTTSModes(List<VoiceInfo> voices)
private void EnumerateViaCOM(List<VoiceInfo> voices)
{
// Check both 32-bit and 64-bit registry locations
string[] registryPaths = new[]
{
@"SOFTWARE\Microsoft\Speech\Voices\TTSMode",
@"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\TTSMode",
@"SOFTWARE\Microsoft\Speech\Voices\Tokens",
@"SOFTWARE\WOW6432Node\Microsoft\Speech\Voices\Tokens"
};

foreach (var basePath in registryPaths)
ITTSEnum ttsEnum = null;

try
{
try
// Create SAPI4 TTS Enumerator
Type ttsEnumType = Type.GetTypeFromCLSID(Sapi4Constants.CLSID_TTSEnumerator);
if (ttsEnumType == null)
{
using (var key = Registry.LocalMachine.OpenSubKey(basePath))
{
if (key == null) continue;

foreach (var modeName in key.GetSubKeyNames())
{
try
{
using (var modeKey = key.OpenSubKey(modeName))
{
if (modeKey == null) continue;
Logger.Log("SAPI4 TTSEnumerator not found. SAPI4 may not be installed.");
return;
}

// Get ModeID (GUID) - this is what MS Agent needs
string modeId = modeName;

// Try to get ModeID from subkey value if it exists
var modeIdValue = modeKey.GetValue("ModeID");
if (modeIdValue != null)
{
modeId = modeIdValue.ToString();
}
object ttsEnumObj = Activator.CreateInstance(ttsEnumType);
ttsEnum = (ITTSEnum)ttsEnumObj;

// Get display name from various possible locations
string displayName = modeKey.GetValue("")?.ToString();
if (string.IsNullOrEmpty(displayName))
{
displayName = modeKey.GetValue("VoiceName")?.ToString();
}
if (string.IsNullOrEmpty(displayName))
{
// Check Attributes subkey
using (var attrKey = modeKey.OpenSubKey("Attributes"))
{
if (attrKey != null)
{
displayName = attrKey.GetValue("Name")?.ToString();
}
}
}
if (string.IsNullOrEmpty(displayName))
{
displayName = modeName;
}
// Reset to start of enumeration
ttsEnum.Reset();

// Skip duplicates
if (voices.Exists(v => v.ModeId == modeId || v.Name == displayName))
continue;
// Enumerate all voices
TTSMODEINFO modeInfo;
uint fetched;
int hr;

voices.Add(new VoiceInfo
{
Id = modeId,
Name = displayName,
ModeId = modeId,
IsSapi4 = true
});
while (true)
{
hr = ttsEnum.Next(1, out modeInfo, out fetched);

// S_OK (0) with fetched > 0 means we got an item
// S_FALSE (1) or any other non-zero hr means no more items
if (hr != 0 || fetched == 0)
break;

// Add voice to list
string modeName = modeInfo.szModeName ?? "";
string productName = modeInfo.szProductName ?? "";

// Build display name
string displayName = modeName;
if (string.IsNullOrEmpty(displayName))
displayName = productName;
if (string.IsNullOrEmpty(displayName))
displayName = modeInfo.gModeID.ToString();

// Skip duplicates
string modeIdStr = modeInfo.gModeID.ToString("B"); // Format as {GUID}
if (voices.Exists(v => v.ModeId == modeIdStr))
continue;

voices.Add(new VoiceInfo
{
Id = modeIdStr,
Name = displayName,
ModeId = modeIdStr,
IsSapi4 = true
});

Logger.Log($"Found SAPI4 TTS Mode: {displayName} (ModeID: {modeId})");
}
}
catch { }
}
}
Logger.Log($"Found SAPI4 voice: {displayName} (ModeID: {modeIdStr})");
}
catch { }
}
}

/// <summary>
/// Enumerate voice tokens (SAPI5 style, fallback)
/// </summary>
private void EnumerateVoiceTokens(List<VoiceInfo> voices)
{
try
catch (Exception ex)
{
Logger.LogError("Error in COM enumeration", ex);
}
finally
{
// Also try OneCore voices (Windows 10+) as fallback
using (var key = Registry.LocalMachine.OpenSubKey(@"SOFTWARE\Microsoft\Speech_OneCore\Voices\Tokens"))
// Release COM object
if (ttsEnum != null)
{
if (key == null) return;

foreach (var tokenName in key.GetSubKeyNames())
try
{
try
{
using (var tokenKey = key.OpenSubKey(tokenName))
{
if (tokenKey == null) continue;

string displayName = tokenKey.GetValue("")?.ToString() ?? tokenName;

// Skip duplicates
if (voices.Exists(v => v.Name == displayName))
continue;

voices.Add(new VoiceInfo
{
Id = tokenName,
Name = displayName + " (SAPI5)",
ModeId = tokenName,
IsSapi4 = false
});

Logger.Log($"Found SAPI5 voice: {displayName}");
}
}
catch { }
Marshal.ReleaseComObject(ttsEnum);
}
catch { }
}
}
catch { }
}

/// <summary>
Expand Down