Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions src/Translumo.Processing/TranslationProcessingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ public class TranslationProcessingService : IProcessingService, IDisposable
private long _lastTranslatedTextTicks;

private const float MIN_SCORE_THRESHOLD = 2.1f;

public TranslationProcessingService(ICapturerFactory capturerFactory, IChatTextMediator chatTextMediator, OcrEnginesFactory ocrEnginesFactory,
TranslatorFactory translationFactory, TtsFactory ttsFactory, TtsConfiguration ttsConfiguration,
TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration,
TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration,
TextResultCacheService textResultCacheService, TextProcessingConfiguration textConfiguration, ILogger<TranslationProcessingService> logger)
{
_logger = logger;
Expand Down Expand Up @@ -236,7 +236,7 @@ void CapturerEnsureInitialized()
continue;
}

if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText,
if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText,
bestDetected.Language.Asian, out iterationId))
{
sequentialText = false;
Expand All @@ -257,7 +257,7 @@ void CapturerEnsureInitialized()
}

_logger.LogError(ex, $"Screen capture failed (code: {ex.ErrorCode})");

_capturer.Dispose();
_capturer = null;
CapturerEnsureInitialized();
Expand Down Expand Up @@ -407,9 +407,15 @@ private void TtsConfigurationOnPropertyChanged(object sender, PropertyChangedEve
if (e.PropertyName == nameof(_ttsConfiguration.TtsLanguage)
|| e.PropertyName == nameof(_ttsConfiguration.TtsSystem))
{
_ttsEngine.Dispose();
_ttsEngine?.Dispose();
_ttsEngine = null;
_ttsEngine = _ttsFactory.CreateTtsEngine(_ttsConfiguration);
}
else if (e.PropertyName == nameof(_ttsConfiguration.CurrentVoice)
&& _ttsEngine != null && _ttsConfiguration.CurrentVoice != null)
{
_ttsEngine.SetVoice(_ttsConfiguration.CurrentVoice);
}
}

private void OcrGeneralConfigurationOnPropertyChanged(object sender, PropertyChangedEventArgs e)
Expand Down
6 changes: 5 additions & 1 deletion src/Translumo.TTS/Engines/ITTSEngine.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
namespace Translumo.TTS.Engines;

public interface ITTSEngine: IDisposable
public interface ITTSEngine : IDisposable
{
void SpeechText(string text);

string[] GetVoices();

void SetVoice(string voice);
}
6 changes: 6 additions & 0 deletions src/Translumo.TTS/Engines/NoneTTSEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ public void Dispose()
{
}

public string[] GetVoices() => new[] { "None" };

public void SetVoice(string voice)
{
}

public void SpeechText(string text)
{
}
Expand Down
8 changes: 7 additions & 1 deletion src/Translumo.TTS/Engines/SileroTTSEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public class SileroTTSEngine : ITTSEngine
{
private dynamic _ipython;
private dynamic _model;
private string[] _voices;
private string _voice;
private readonly string _modelPath;
private readonly PythonEngineWrapper _pythonEngine;
Expand Down Expand Up @@ -51,7 +52,8 @@ private void Init()
_pyObjects.Add(_ipython);
});

_voice = ((string[])_model.speakers).First();
_voices = (string[])_model.speakers;
_voice = _voices.First();
}

public void SpeechText(string text)
Expand Down Expand Up @@ -146,5 +148,9 @@ private string GetModelFullPath(string langCode)
_ => null
};

public string[] GetVoices() => _voices;

public void SetVoice(string voice) => _voice = _voices.First(x => x.Equals(voice, StringComparison.OrdinalIgnoreCase));

private sealed record ModelDescription(string FileUrl, string WarmUpText);
}
97 changes: 93 additions & 4 deletions src/Translumo.TTS/Engines/WindowsTTSEngine.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
using System.Globalization;
using System.Collections;
using System.Collections.ObjectModel;
using System.Globalization;
using System.Reflection;
using System.Speech.Synthesis;

namespace Translumo.TTS.Engines;

public class WindowsTTSEngine : ITTSEngine
{
private readonly VoiceInfo _voiceInfo;
private VoiceInfo _voiceInfo;
private readonly SpeechSynthesizer _synthesizer;
private readonly ReadOnlyDictionary<string, VoiceInfo> _voices;

public WindowsTTSEngine(string languageCode)
{
_synthesizer = new SpeechSynthesizer();
_synthesizer.SetOutputToDefaultAudioDevice();
_synthesizer.Rate = 1;

_voiceInfo = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).FirstOrDefault()?.VoiceInfo;
SpeechApiReflectionHelper.InjectOneCoreVoices(_synthesizer);
_voices = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).ToDictionary(x => x.VoiceInfo.Name, x => x.VoiceInfo).AsReadOnly();
_voiceInfo = _voices.First().Value;
}

public void SpeechText(string text)
Expand All @@ -36,4 +41,88 @@ public void Dispose()
{
_synthesizer.Dispose();
}

public string[] GetVoices() => _voices.Keys.ToArray();

public void SetVoice(string voice) => _voiceInfo = _voices.First(x => x.Key.Equals(voice, StringComparison.OrdinalIgnoreCase)).Value;

// by default SpeechSynthesizer show not all available voices
// https://stackoverflow.com/a/71198211
private static class SpeechApiReflectionHelper
{
private const string PROP_VOICE_SYNTHESIZER = "VoiceSynthesizer";
private const string FIELD_INSTALLED_VOICES = "_installedVoices";

private const string ONE_CORE_VOICES_REGISTRY = @"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech_OneCore\Voices";

private static readonly Type _objectTokenCategoryType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Internal.ObjectTokens.ObjectTokenCategory")!;

private static readonly Type _voiceInfoType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Synthesis.VoiceInfo")!;

private static readonly Type _installedVoiceType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Synthesis.InstalledVoice")!;


public static void InjectOneCoreVoices(SpeechSynthesizer synthesizer)
{
var voiceSynthesizer = GetProperty(synthesizer, PROP_VOICE_SYNTHESIZER);
if (voiceSynthesizer == null)
throw new NotSupportedException($"Property not found: {PROP_VOICE_SYNTHESIZER}");

var installedVoices = GetField(voiceSynthesizer, FIELD_INSTALLED_VOICES) as IList;
if (installedVoices == null)
throw new NotSupportedException($"Field not found or null: {FIELD_INSTALLED_VOICES}");

if (_objectTokenCategoryType
.GetMethod("Create", BindingFlags.Static | BindingFlags.NonPublic)?
.Invoke(null, new object?[] { ONE_CORE_VOICES_REGISTRY }) is not IDisposable otc)
throw new NotSupportedException($"Failed to call Create on {_objectTokenCategoryType} instance");

using (otc)
{
if (_objectTokenCategoryType
.GetMethod("FindMatchingTokens", BindingFlags.Instance | BindingFlags.NonPublic)?
.Invoke(otc, new object?[] { null, null }) is not IList tokens)
throw new NotSupportedException($"Failed to list matching tokens");

foreach (var token in tokens)
{
if (token == null || GetProperty(token, "Attributes") == null)
continue;

var voiceInfo =
typeof(SpeechSynthesizer).Assembly
.CreateInstance(_voiceInfoType.FullName!, true,
BindingFlags.Instance | BindingFlags.NonPublic, null,
new object[] { token }, null, null);

if (voiceInfo == null)
throw new NotSupportedException($"Failed to instantiate {_voiceInfoType}");

var installedVoice =
typeof(SpeechSynthesizer).Assembly
.CreateInstance(_installedVoiceType.FullName!, true,
BindingFlags.Instance | BindingFlags.NonPublic, null,
new object[] { voiceSynthesizer, voiceInfo }, null, null);

if (installedVoice == null)
throw new NotSupportedException($"Failed to instantiate {_installedVoiceType}");

installedVoices.Add(installedVoice);
}
}
}

private static object? GetProperty(object target, string propName)
{
return target.GetType().GetProperty(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target);
}

private static object? GetField(object target, string propName)
{
return target.GetType().GetField(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target);
}
}
}
96 changes: 96 additions & 0 deletions src/Translumo.TTS/Engines/YandexTTSEngine.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
using NAudio.Wave;

namespace Translumo.TTS.Engines;

public class YandexTTSEngine : ITTSEngine
{
private readonly string _langCode;
private string _voice;
private CancellationTokenSource _tokenSource;
private readonly DirectSoundOut _naudioPlayer;
private readonly string _emotion = "neutral"; // neutral|good|evil
private const float _speed = 1.0f;
private const string _format = "mp3"; // ogg | mp3 | wav
private const string _key = "1"; // yandex doesnt validate key, its can have any not empty value

public YandexTTSEngine(string langCode)
{
_langCode = langCode;
_voice = GetVoices().First();
_naudioPlayer = new DirectSoundOut();
_tokenSource = new CancellationTokenSource();
}

public void SpeechText(string text)
{
_tokenSource.Cancel();
_tokenSource = new CancellationTokenSource();
var currentToken = _tokenSource.Token;
Task.Run(() => SpeechTextInternalAsync(text, currentToken));
}

private async Task SpeechTextInternalAsync(string text, CancellationToken token)
{
using var stream = await RequestAudioAsync(text, token);
using var audioFileReader = new Mp3FileReader(stream);

_naudioPlayer.Stop();
_naudioPlayer.Init(audioFileReader);
_naudioPlayer.Play();

var duration = Convert.ToInt32(Math.Round(audioFileReader.TotalTime.TotalMilliseconds, MidpointRounding.ToPositiveInfinity));
Task.Delay(duration).Wait(token);
}

private async Task<Stream> RequestAudioAsync(string text, CancellationToken token)
{
var httpClient = new HttpClient();
var httpResponse = await httpClient.GetAsync(BuildUrl(text), token).ConfigureAwait(false);
if (httpResponse.IsSuccessStatusCode)
{
return await httpResponse.Content.ReadAsStreamAsync(token).ConfigureAwait(false);
}

var error = await httpResponse.Content.ReadAsStringAsync(token);
throw new InvalidOperationException($"Failed to get sound: '{httpResponse.StatusCode}, {error}'");
}

private string BuildUrl(string text)
{
var encodedText = Uri.EscapeDataString(text);
return $"https://tts.voicetech.yandex.net/generate?text={encodedText}&lang={_langCode}&key={_key}&speaker={_voice}&format={_format}&speed={_speed:#.##}&emotion={_emotion}&quality=hi";
}

public void SetVoice(string voice) =>
_voice = GetVoices().FirstOrDefault(x => x.Equals(voice, StringComparison.OrdinalIgnoreCase))
?? throw new InvalidDataException($"Voice '{voice}' doesnt support '{_langCode}' language ");

public string[] GetVoices() =>
SupportedVoices().GetValueOrDefault(_langCode)
?? throw new InvalidDataException($"{nameof(YandexTTSEngine)} doesnt support '{_langCode}' language");

public void Dispose()
{
_tokenSource.Cancel();
_naudioPlayer.Stop();
_naudioPlayer.Dispose();
}

private static Dictionary<string, string[]> SupportedVoices()
{
var listOfVoices = new[] {
// dictor dialog voices: zahar, ermil, filipp,| jane, omazh, alena
"zahar", "ermil", "dude", "jane", "omazh", "oksana",
// additional voices
"alyss", "erkanyavas", "ermilov", "kolya", "kostya", "levitan", "nastya", "nick", "sasha", "silaerkan", "smoky", "tanya", "voicesearch", "zhenya", "zombie"
};

return new Dictionary<string, string[]>()
{
// dictor dialog voices: zahar, ermil, filipp,| jane, omazh, alena
{ "ru-RU", listOfVoices},
{ "en-US", listOfVoices},
{ "tr-TR", listOfVoices},
};
}
}
6 changes: 6 additions & 0 deletions src/Translumo.TTS/IObserverAvailableVoices.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Translumo.TTS;

public interface IObserverAvailableVoices
{
void UpdateVoice(IList<string> currentVoices);
}
1 change: 1 addition & 0 deletions src/Translumo.TTS/TTSEngines.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ public enum TTSEngines
None = 0,
WindowsTTS = 1,
SileroTTS = 2,
YandexTTS = 3,
}
}
1 change: 1 addition & 0 deletions src/Translumo.TTS/Translumo.TTS.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="7.0.0" />
<PackageReference Include="NAudio" Version="2.2.1" />
<PackageReference Include="System.Speech" Version="7.0.0" />
</ItemGroup>

Expand Down
14 changes: 11 additions & 3 deletions src/Translumo.TTS/TtsConfiguration.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Translumo.Infrastructure.Language;
using System.Collections.ObjectModel;
using Translumo.Infrastructure.Language;
using Translumo.Utils;
using Windows.Security.EnterpriseData;

namespace Translumo.TTS;

Expand All @@ -12,12 +12,14 @@ public class TtsConfiguration : BindableBase
{
TtsLanguage = Languages.English,
TtsSystem = TTSEngines.None,
InstalledWinTtsLanguages = new List<Languages>()
InstalledWinTtsLanguages = new List<Languages>(),
_currentVoice = string.Empty,
};

private TTSEngines _ttsSystem;
private Languages _ttsLanguage;
private List<Languages> _installedWinTtsLanguages;
private string _currentVoice;

public TTSEngines TtsSystem
{
Expand All @@ -37,6 +39,12 @@ public Languages TtsLanguage
}
}

public string CurrentVoice
{
get => _currentVoice;
set => SetProperty(ref _currentVoice, value);
}

public List<Languages> InstalledWinTtsLanguages
{
get => _installedWinTtsLanguages;
Expand Down
Loading