From f7389c596e60eddd3394b8bd8eb54e9e5826c39b Mon Sep 17 00:00:00 2001 From: Ivan Date: Sun, 8 Oct 2023 21:58:52 +0300 Subject: [PATCH 01/10] add voice switcher --- .../TranslationProcessingService.cs | 14 ++++++++---- src/Translumo.TTS/Engines/ITTSEngine.cs | 6 ++++- src/Translumo.TTS/Engines/NoneTTSEngine.cs | 6 +++++ src/Translumo.TTS/Engines/SileroTTSEngine.cs | 8 ++++++- src/Translumo.TTS/Engines/WindowsTTSEngine.cs | 14 ++++++++---- src/Translumo.TTS/TtsConfiguration.cs | 22 +++++++++++++++++++ src/Translumo.TTS/TtsFactory.cs | 18 +++++++++++++-- .../MVVM/Views/LanguagesSettingsView.xaml | 4 ++++ .../Resources/Localization/lang.en-US.xaml | 1 + .../Resources/Localization/lang.ru-RU.xaml | 1 + src/Translumo/Translumo.csproj | 1 - 11 files changed, 82 insertions(+), 13 deletions(-) diff --git a/src/Translumo.Processing/TranslationProcessingService.cs b/src/Translumo.Processing/TranslationProcessingService.cs index 11cb38b..e0ba352 100644 --- a/src/Translumo.Processing/TranslationProcessingService.cs +++ b/src/Translumo.Processing/TranslationProcessingService.cs @@ -52,10 +52,10 @@ public class TranslationProcessingService : IProcessingService, IDisposable private long _lastTranslatedTextTicks; private const float MIN_SCORE_THRESHOLD = 2.1f; - + public TranslationProcessingService(ICapturerFactory capturerFactory, IChatTextMediator chatTextMediator, OcrEnginesFactory ocrEnginesFactory, TranslatorFactory translationFactory, TtsFactory ttsFactory, TtsConfiguration ttsConfiguration, - TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration, + TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration, TextResultCacheService textResultCacheService, TextProcessingConfiguration textConfiguration, ILogger logger) { _logger = logger; @@ -236,7 +236,7 @@ void CapturerEnsureInitialized() continue; } - if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText, + if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText, bestDetected.Language.Asian, out iterationId)) { sequentialText = false; @@ -257,7 +257,7 @@ void CapturerEnsureInitialized() } _logger.LogError(ex, $"Screen capture failed (code: {ex.ErrorCode})"); - + _capturer.Dispose(); _capturer = null; CapturerEnsureInitialized(); @@ -408,8 +408,14 @@ private void TtsConfigurationOnPropertyChanged(object sender, PropertyChangedEve || e.PropertyName == nameof(_ttsConfiguration.TtsSystem)) { _ttsEngine.Dispose(); + _ttsEngine = null; _ttsEngine = _ttsFactory.CreateTtsEngine(_ttsConfiguration); } + else if (e.PropertyName == nameof(_ttsConfiguration.CurrentVoice) + && _ttsEngine != null) + { + _ttsEngine.SetVoice(_ttsConfiguration.CurrentVoice); + } } private void OcrGeneralConfigurationOnPropertyChanged(object sender, PropertyChangedEventArgs e) diff --git a/src/Translumo.TTS/Engines/ITTSEngine.cs b/src/Translumo.TTS/Engines/ITTSEngine.cs index e919f9d..191bf73 100644 --- a/src/Translumo.TTS/Engines/ITTSEngine.cs +++ b/src/Translumo.TTS/Engines/ITTSEngine.cs @@ -1,6 +1,10 @@ namespace Translumo.TTS.Engines; -public interface ITTSEngine: IDisposable +public interface ITTSEngine : IDisposable { void SpeechText(string text); + + string[] GetVoices(); + + void SetVoice(string voice); } diff --git a/src/Translumo.TTS/Engines/NoneTTSEngine.cs b/src/Translumo.TTS/Engines/NoneTTSEngine.cs index 6ef3e2a..e08424d 100644 --- a/src/Translumo.TTS/Engines/NoneTTSEngine.cs +++ b/src/Translumo.TTS/Engines/NoneTTSEngine.cs @@ -10,6 +10,12 @@ public void Dispose() { } + public string[] GetVoices() => new[] { "None" }; + + public void SetVoice(string voice) + { + } + public void SpeechText(string text) { } diff --git a/src/Translumo.TTS/Engines/SileroTTSEngine.cs b/src/Translumo.TTS/Engines/SileroTTSEngine.cs index 8c4bf5d..238dcb2 100644 --- a/src/Translumo.TTS/Engines/SileroTTSEngine.cs +++ b/src/Translumo.TTS/Engines/SileroTTSEngine.cs @@ -8,6 +8,7 @@ public class SileroTTSEngine : ITTSEngine { private dynamic _ipython; private dynamic _model; + private string[] _voices; private string _voice; private readonly string _modelPath; private readonly PythonEngineWrapper _pythonEngine; @@ -50,7 +51,8 @@ private void Init() _pyObjects.Add(_ipython); }); - _voice = ((string[])_model.speakers).First(); + _voices = (string[])_model.speakers; + _voice = _voices.First(); } public void SpeechText(string text) @@ -140,5 +142,9 @@ private string GetModelFullPath(string langCode) _ => null }; + public string[] GetVoices() => _voices; + + public void SetVoice(string voice) => _voice = _voices.First(x => x.Equals(voice, StringComparison.OrdinalIgnoreCase)); + private sealed record ModelDescription(string FileUrl, string WarmUpText); } \ No newline at end of file diff --git a/src/Translumo.TTS/Engines/WindowsTTSEngine.cs b/src/Translumo.TTS/Engines/WindowsTTSEngine.cs index ef8b2f4..c52a010 100644 --- a/src/Translumo.TTS/Engines/WindowsTTSEngine.cs +++ b/src/Translumo.TTS/Engines/WindowsTTSEngine.cs @@ -1,20 +1,22 @@ -using System.Globalization; +using System.Collections.ObjectModel; +using System.Globalization; using System.Speech.Synthesis; namespace Translumo.TTS.Engines; public class WindowsTTSEngine : ITTSEngine { - private readonly VoiceInfo _voiceInfo; + private VoiceInfo _voiceInfo; private readonly SpeechSynthesizer _synthesizer; + private readonly ReadOnlyDictionary _voices; public WindowsTTSEngine(string languageCode) { _synthesizer = new SpeechSynthesizer(); _synthesizer.SetOutputToDefaultAudioDevice(); _synthesizer.Rate = 1; - - _voiceInfo = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).FirstOrDefault()?.VoiceInfo; + _voices = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).ToDictionary(x => x.VoiceInfo.Name, x => x.VoiceInfo).AsReadOnly(); + _voiceInfo = _voices.First().Value; } public void SpeechText(string text) @@ -36,4 +38,8 @@ public void Dispose() { _synthesizer.Dispose(); } + + public string[] GetVoices() => _voices.Keys.ToArray(); + + public void SetVoice(string voice) => _voiceInfo = _voices.First(x => x.Key.Equals(voice, StringComparison.OrdinalIgnoreCase)).Value; } \ No newline at end of file diff --git a/src/Translumo.TTS/TtsConfiguration.cs b/src/Translumo.TTS/TtsConfiguration.cs index 334a3c5..fc76e1f 100644 --- a/src/Translumo.TTS/TtsConfiguration.cs +++ b/src/Translumo.TTS/TtsConfiguration.cs @@ -18,11 +18,15 @@ public TtsConfiguration(LanguageService languageService) TtsSystem = TTSEngines.None; InstalledWinTtsLanguages = new List(); _languageService = languageService; + _availableVoices = new(); + _currentVoice = string.Empty; } private TTSEngines _ttsSystem; private Languages _ttsLanguage; private List _installedWinTtsLanguages; + private List _availableVoices; + private string _currentVoice; private readonly LanguageService _languageService; public bool IsLanguageSupportedInTtsEngine(TTSEngines engine, Languages lang) => @@ -46,6 +50,24 @@ public Languages TtsLanguage } } + public List AvailableVoices + { + get => _availableVoices; + set + { + SetProperty(ref _availableVoices, value); + } + } + + public string CurrentVoice + { + get => _currentVoice; + set + { + SetProperty(ref _currentVoice, value); + } + } + public List InstalledWinTtsLanguages { get => _installedWinTtsLanguages; diff --git a/src/Translumo.TTS/TtsFactory.cs b/src/Translumo.TTS/TtsFactory.cs index 789bd95..01e8b32 100644 --- a/src/Translumo.TTS/TtsFactory.cs +++ b/src/Translumo.TTS/TtsFactory.cs @@ -18,8 +18,9 @@ public TtsFactory(LanguageService languageService, PythonEngineWrapper pythonEng _logger = logger; } - public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) => - ttsConfiguration.TtsSystem switch + public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) + { + ITTSEngine ttsEngine = ttsConfiguration.TtsSystem switch { TTSEngines.None => new NoneTTSEngine(), TTSEngines.WindowsTTS => new WindowsTTSEngine(GetLangCode(ttsConfiguration)), @@ -27,6 +28,19 @@ public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) => _ => throw new NotSupportedException() }; + var voices = ttsEngine.GetVoices(); + var currentVoice = voices.Contains(ttsConfiguration.CurrentVoice) + ? ttsConfiguration.CurrentVoice + : voices.First(); + + ttsConfiguration.AvailableVoices.Clear(); + ttsConfiguration.AvailableVoices.AddRange(voices); + + ttsConfiguration.CurrentVoice = currentVoice; + ttsEngine.SetVoice(currentVoice); + + return ttsEngine; + } public static bool IsLanguageSupported(TTSEngines engine, Languages language, LanguageService languageService) => engine switch diff --git a/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml b/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml index 82bfc8c..6610454 100644 --- a/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml +++ b/src/Translumo/MVVM/Views/LanguagesSettingsView.xaml @@ -54,6 +54,10 @@