-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeak_hook.py
More file actions
111 lines (90 loc) · 2.94 KB
/
speak_hook.py
File metadata and controls
111 lines (90 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python3
# Copyright (c) 2026 Nardo. AGPL-3.0 — see LICENSE
"""
Claude Code Stop hook — reads assistant response from stdin and speaks it.
Checks /tmp/tts_muted IMMEDIATELY before speaking. No background races.
"""
import sys
import json
import re
import subprocess
import os
EDGE_VOICE = "zh-HK-HiuMaanNeural"
MAX_CHARS = 300
MUTE_FLAG = "/tmp/tts_muted"
SPEAK_PID_FILE = "/tmp/speak_hook_bg.pid"
def clean_text(text):
text = re.sub(r"```.*?```", "", text, flags=re.DOTALL)
text = re.sub(r"`[^`]+`", "", text)
text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
text = re.sub(r"\*+([^*]+)\*+", r"\1", text)
text = re.sub(r"https?://\S+", "", text)
text = re.sub(r"\|[^\n]+\|", "", text)
text = re.sub(r"\n{3,}", "\n\n", text)
text = text.strip()
if len(text) > MAX_CHARS:
text = text[:MAX_CHARS] + "……"
return text
def speak(text):
# Kill any previous say process
subprocess.run(["pkill", "-x", "say"], capture_output=True)
try:
old_pid = int(open(SPEAK_PID_FILE).read())
os.kill(old_pid, 9)
except Exception:
pass
# Background script — edge-tts generates mp3, afplay plays it
script = f"""
import time, os, subprocess, sys, asyncio
my_pid = os.getpid()
open({repr(SPEAK_PID_FILE)}, "w").write(str(my_pid))
# Wait for any active recording/transcription to finish
for _ in range(60):
if not os.path.exists("/tmp/recording_active") and not os.path.exists("/tmp/transcribing_active"):
break
time.sleep(0.1)
# Check we are still the active speak process
try:
current = int(open({repr(SPEAK_PID_FILE)}).read())
except Exception:
current = None
if current != my_pid:
sys.exit(0)
# Final mute check — RIGHT before speaking, no gap
if os.path.exists({repr(MUTE_FLAG)}):
sys.exit(0)
async def _speak():
import edge_tts
mp3 = "/tmp/tts_output.mp3"
communicate = edge_tts.Communicate({repr(text)}, "{EDGE_VOICE}")
await communicate.save(mp3)
subprocess.run(["afplay", mp3])
try:
os.unlink(mp3)
except Exception:
pass
asyncio.run(_speak())
"""
subprocess.Popen(["python3", "-c", script])
def main():
try:
raw = sys.stdin.read()
data = json.loads(raw)
text = data.get("last_assistant_message", "")
# Debug log EVERY invocation
muted = os.path.exists(MUTE_FLAG)
with open("/tmp/speak_hook_debug.log", "a") as f:
import time as _t
f.write(f"{_t.strftime('%H:%M:%S')} muted={muted} text_len={len(text)} text={text[:50]!r}\n")
# First gate: check mute before doing anything
if muted:
return
if text:
text = clean_text(text)
if text:
speak(text)
except Exception as e:
with open("/tmp/speak_hook_error.log", "a") as f:
f.write(f"{e}\n{raw[:200] if 'raw' in dir() else ''}\n---\n")
if __name__ == "__main__":
main()