Skip to content

Commit 0726093

Browse files
authored
feat(speech-gateway): saner defaults (#32)
1 parent 753c53d commit 0726093

File tree

4 files changed

+78
-13
lines changed

4 files changed

+78
-13
lines changed

examples/speech-gateway/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
gateway

examples/speech-gateway/Makefile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SPDX-FileCopyrightText: © 2025 StreamKit Contributors
2+
#
3+
# SPDX-License-Identifier: MPL-2.0
4+
5+
.PHONY: build lint run clean
6+
7+
build:
8+
go build -o gateway ./cmd/gateway
9+
10+
lint:
11+
golangci-lint run
12+
13+
run:
14+
go run ./cmd/gateway
15+
16+
clean:
17+
rm -f gateway

examples/speech-gateway/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ Environment equivalents:
2626
- `SKIT_URL` (default `http://127.0.0.1:4545`)
2727
- `SKIT_TOKEN` (optional bearer sent to Skit)
2828
- `GATEWAY_MAX_CONCURRENCY` (default 10)
29-
- `GATEWAY_MAX_BODY_BYTES` (default 10MB)
29+
- `GATEWAY_MAX_BODY_BYTES` (default 1MB)
30+
- `GATEWAY_MAX_TTS_TEXT_SIZE` (default 1000 characters)
3031

3132
## STT via curl (Ogg/Opus)
3233

examples/speech-gateway/cmd/gateway/main.go

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"strconv"
2222
"strings"
2323
"time"
24+
"unicode/utf8"
2425
)
2526

2627
const (
@@ -96,11 +97,12 @@ steps:
9697
)
9798

9899
type gateway struct {
99-
client *http.Client
100-
skitURL string
101-
authToken string
102-
maxBodySize int64
103-
sem chan struct{}
100+
client *http.Client
101+
skitURL string
102+
authToken string
103+
maxBodySize int64
104+
maxTTSTextSize int64
105+
sem chan struct{}
104106
}
105107

106108
type config struct {
@@ -109,16 +111,18 @@ type config struct {
109111
listenAddr string
110112
maxConcurrency int
111113
maxBodySize int64
114+
maxTTSTextSize int64
112115
}
113116

114117
func main() {
115118
cfg := loadConfig()
116119
gw := &gateway{
117-
client: newHTTPClient(),
118-
skitURL: cfg.skitURL,
119-
authToken: cfg.authToken,
120-
maxBodySize: cfg.maxBodySize,
121-
sem: make(chan struct{}, cfg.maxConcurrency),
120+
client: newHTTPClient(),
121+
skitURL: cfg.skitURL,
122+
authToken: cfg.authToken,
123+
maxBodySize: cfg.maxBodySize,
124+
maxTTSTextSize: cfg.maxTTSTextSize,
125+
sem: make(chan struct{}, cfg.maxConcurrency),
122126
}
123127

124128
mux := http.NewServeMux()
@@ -142,7 +146,8 @@ func loadConfig() config {
142146
skit := flagString("skit-url", getEnvDefault("SKIT_URL", defaultSkitURL), "Skit backend URL")
143147
token := flagString("token", os.Getenv("SKIT_TOKEN"), "Bearer token for Skit (overrides SKIT_TOKEN env)")
144148
maxConc := flagInt("max-concurrency", envInt("GATEWAY_MAX_CONCURRENCY", 10), "Maximum concurrent in-flight requests")
145-
maxBody := flagInt64("max-body-bytes", envInt64("GATEWAY_MAX_BODY_BYTES", 10*1024*1024), "Maximum request body size")
149+
maxBody := flagInt64("max-body-bytes", envInt64("GATEWAY_MAX_BODY_BYTES", 1*1024*1024), "Maximum request body size")
150+
maxTTSText := flagInt64("max-tts-text-size", envInt64("GATEWAY_MAX_TTS_TEXT_SIZE", 1000), "Maximum TTS text size in characters")
146151

147152
flag.Parse()
148153

@@ -152,6 +157,7 @@ func loadConfig() config {
152157
listenAddr: *listen,
153158
maxConcurrency: *maxConc,
154159
maxBodySize: *maxBody,
160+
maxTTSTextSize: *maxTTSText,
155161
}
156162
}
157163

@@ -267,8 +273,48 @@ func (gw *gateway) handleTTS(w http.ResponseWriter, r *http.Request) {
267273
}
268274
release := gw.acquire()
269275
defer release()
276+
277+
// Read and validate text size
270278
r.Body = http.MaxBytesReader(w, r.Body, gw.maxBodySize)
271-
useBuffer := r.ContentLength > 0 && r.ContentLength <= gw.maxBodySize
279+
280+
// UTF-8 characters can be up to 4 bytes, so read up to 4x the character limit
281+
// to ensure we can properly count characters and detect if input exceeds limit
282+
maxReadBytes := gw.maxTTSTextSize * 4
283+
textBytes, err := io.ReadAll(io.LimitReader(r.Body, maxReadBytes))
284+
if err != nil {
285+
log.Printf("tts read error: %v", err)
286+
http.Error(w, "failed to read request body", http.StatusBadRequest)
287+
return
288+
}
289+
290+
// Count UTF-8 runes (characters) instead of bytes
291+
runeCount := int64(utf8.RuneCount(textBytes))
292+
293+
// If we read the full buffer, check if there's more data
294+
if int64(len(textBytes)) == maxReadBytes {
295+
// Try to read one more byte to see if there's more
296+
extra := make([]byte, 1)
297+
n, _ := r.Body.Read(extra)
298+
if n > 0 {
299+
// There's more data, so we definitely exceeded the limit
300+
log.Printf("tts text too large: >%d chars (max: %d)", runeCount, gw.maxTTSTextSize)
301+
http.Error(w, fmt.Sprintf("text too large: exceeds %d characters", gw.maxTTSTextSize), http.StatusRequestEntityTooLarge)
302+
return
303+
}
304+
}
305+
306+
if runeCount > gw.maxTTSTextSize {
307+
log.Printf("tts text too large: %d chars (max: %d)", runeCount, gw.maxTTSTextSize)
308+
http.Error(w, fmt.Sprintf("text too large: %d characters (max: %d)", runeCount, gw.maxTTSTextSize), http.StatusRequestEntityTooLarge)
309+
return
310+
}
311+
312+
log.Printf("tts text length: %d chars (%d bytes)", runeCount, len(textBytes))
313+
314+
// Replace body with buffered content
315+
r.Body = io.NopCloser(bytes.NewReader(textBytes))
316+
317+
useBuffer := true // We've already buffered it
272318
if err := gw.proxyMultipart(w, r, ttsPipelineYAML, "media", "text/plain", useBuffer); err != nil {
273319
log.Printf("tts error: %v", err)
274320
if !errors.Is(err, context.Canceled) {

0 commit comments

Comments
 (0)