-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtts.py
More file actions
55 lines (46 loc) · 1.86 KB
/
tts.py
File metadata and controls
55 lines (46 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# tts.py
from google.cloud import texttospeech
import numpy as np
import sounddevice as sd
# ====== Audio I/O config (match your working output test) ======
SAMPLE_RATE = 48000 # 48 kHz to match USB device
CHANNELS_OUT = 2 # play as stereo (duplicate mono to L/R)
DTYPE = "int16"
OUTPUT_DEV = "hw:3,0" # set to your USB output device (e.g., "plughw:3,0")
# Pre-configure sounddevice defaults
sd.default.samplerate = SAMPLE_RATE
sd.default.channels = CHANNELS_OUT
sd.default.dtype = DTYPE
sd.default.device = (None, OUTPUT_DEV)
# ====== Google TTS client ======
tts_client = texttospeech.TextToSpeechClient()
def _mono_pcm16_to_stereo_int16(raw_bytes: bytes) -> np.ndarray:
"""Convert mono PCM16 little-endian bytes to stereo int16 numpy array."""
mono = np.frombuffer(raw_bytes, dtype=np.int16)
# shape -> (N, 1) then duplicate to (N, 2)
stereo = np.stack([mono, mono], axis=1)
return stereo
def text_to_speech(text: str) -> None:
"""Synthesize Korean speech with Google TTS and play via sounddevice."""
if not text:
return
# Build synthesis request: LINEAR16 @ 48 kHz, mono
synthesis_input = texttospeech.SynthesisInput(text=text)
voice = texttospeech.VoiceSelectionParams(
language_code="ko-KR",
name="ko-KR-Standard-A",
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.LINEAR16,
sample_rate_hertz=SAMPLE_RATE, # request 48 kHz output
)
# Call Google TTS
response = tts_client.synthesize_speech(
input=synthesis_input,
voice=voice,
audio_config=audio_config
)
# Convert LINEAR16 mono -> stereo int16
stereo_i16 = _mono_pcm16_to_stereo_int16(response.audio_content)
# Play blocking on the selected ALSA device
sd.play(stereo_i16, SAMPLE_RATE, blocking=True, device=OUTPUT_DEV)