-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
135 lines (105 loc) · 4.83 KB
/
main.py
File metadata and controls
135 lines (105 loc) · 4.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
from pathlib import Path
from typing import Optional
import typer
from dotenv import load_dotenv
from gtts import gTTS
load_dotenv()
app = typer.Typer(help="Language flashcard generator with TTS and AI enhancement.")
@app.command()
def google(
text: str = typer.Argument(help="Text to convert to speech."),
output: Path = typer.Option("output.mp3", "--output", "-o", help="Output file path."),
lang: str = typer.Option("en", "--lang", "-l", help="Language code (e.g. en, es, fr, ja)."),
slow: bool = typer.Option(False, "--slow", "-s", help="Speak slowly."),
) -> None:
"""Convert text to speech using Google TTS."""
tts = gTTS(text=text, lang=lang, slow=slow)
tts.save(str(output))
typer.echo(f"Saved audio to {output}")
@app.command()
def azure(
text: str = typer.Argument(help="Text to convert to speech."),
output: Path = typer.Option("output.mp3", "--output", "-o", help="Output file path."),
voice: Optional[str] = typer.Option(None, "--voice", "-v", help="Azure neural voice name (e.g. zu-ZA-ThandoNeural)."),
lang: str = typer.Option("en-US", "--lang", "-l", help="Language code (e.g. en-US, zu-ZA, ja-JP)."),
rate: str = typer.Option("medium", "--rate", "-r", help="Speech rate: x-slow, slow, medium, fast, x-fast, or percentage (e.g. +50%, -30%)."),
) -> None:
"""Convert text to speech using Azure TTS."""
import azure.cognitiveservices.speech as speechsdk
from azure.identity import DefaultAzureCredential
endpoint = os.environ.get("SPEECH_ENDPOINT")
resource_id = os.environ.get("SPEECH_RESOURCE_ID")
if not endpoint or not resource_id:
typer.echo("Error: SPEECH_ENDPOINT and SPEECH_RESOURCE_ID environment variables are required.", err=True)
raise typer.Exit(1)
speech_config = speechsdk.SpeechConfig(endpoint=endpoint)
# Authenticate via az cli / DefaultAzureCredential
aad_token = (
DefaultAzureCredential()
.get_token("https://cognitiveservices.azure.com/.default")
.token
)
speech_config.authorization_token = f"aad#{resource_id}#{aad_token}"
if voice:
speech_config.speech_synthesis_voice_name = voice
else:
speech_config.speech_synthesis_language = lang
speech_config.set_speech_synthesis_output_format(
speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
)
audio_config = speechsdk.audio.AudioOutputConfig(filename=str(output))
synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
voice_name = voice or speech_config.speech_synthesis_voice_name
ssml = (
f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{lang}">'
f'<voice name="{voice_name}">'
f'<prosody rate="{rate}">{text}</prosody>'
f'</voice></speak>'
)
result = synthesizer.speak_ssml_async(ssml).get()
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
typer.echo(f"Saved audio to {output}")
else:
details = result.cancellation_details
typer.echo(f"Synthesis failed: {details.reason} — {details.error_details}", err=True)
raise typer.Exit(1)
@app.command()
def build_deck(
config: Path = typer.Argument(help="Path to YAML config file (e.g. configs/zulu_common.yaml)."),
size: Optional[int] = typer.Option(None, "--size", "-s", help="Limit to N cards."),
render_only: bool = typer.Option(False, "--render-only", help="Re-render .mochi from cached cards only."),
start: int = typer.Option(0, "--start", help="Start from card index (for resume)."),
) -> None:
"""Build a Mochi flashcard deck from frequency lists using Claude AI and Azure TTS."""
import yaml
from enhancer import FlashcardEnhancer
from models import EnhancementConfig
if not config.exists():
typer.echo(f"Config file not found: {config}", err=True)
raise typer.Exit(1)
with open(config, "r") as f:
config_data = yaml.safe_load(f)
enhancement_config = EnhancementConfig(**config_data)
enhancer = FlashcardEnhancer(enhancement_config)
success = enhancer.build(size=size, render_only=render_only, start=start)
if not success:
raise typer.Exit(1)
@app.command()
def clean(
config: Path = typer.Argument(help="Path to YAML config file."),
) -> None:
"""Remove empty (0-byte) audio files left by failed TTS generation."""
import yaml
from enhancer import FlashcardEnhancer
from models import EnhancementConfig
if not config.exists():
typer.echo(f"Config file not found: {config}", err=True)
raise typer.Exit(1)
with open(config, "r") as f:
config_data = yaml.safe_load(f)
enhancement_config = EnhancementConfig(**config_data)
enhancer = FlashcardEnhancer(enhancement_config)
enhancer.clean_empty_audio()
if __name__ == "__main__":
app()