diff --git a/.env.example b/.env.example
index 62cf8ea..6853656 100644
--- a/.env.example
+++ b/.env.example
@@ -15,11 +15,30 @@ FEED_LINK=http://localhost:8080
FEED_AUTHOR=Your Name
# TTS (Text-to-Speech) Configuration
+# Provider: "openai" or "elevenlabs" (default: openai)
+TTS_PROVIDER=openai
+# Directory for storing generated audio files
+AUDIO_DIR=/data/audio
+
+# OpenAI TTS (when TTS_PROVIDER=openai)
# Set your OpenAI API key to enable TTS. Without it, TTS features are disabled.
OPENAI_API_KEY=your_openai_api_key
# Available models: tts-1 (faster), tts-1-hd (higher quality)
TTS_MODEL=tts-1
# Available voices: alloy, echo, fable, onyx, nova, shimmer
TTS_VOICE=alloy
-# Directory for storing generated audio files
-AUDIO_DIR=/data/audio
+
+# ElevenLabs TTS (when TTS_PROVIDER=elevenlabs)
+# For custom voice cloning - clone your voice at https://elevenlabs.io
+# ELEVENLABS_API_KEY=your_elevenlabs_api_key
+# ELEVENLABS_VOICE_ID=your_cloned_voice_id
+# Model: eleven_multilingual_v2 recommended for Swedish
+# ELEVENLABS_MODEL=eleven_multilingual_v2
+# Display name for your custom voice
+# ELEVENLABS_VOICE_NAME=My Voice
+
+# Podcast Configuration
+# Language code for podcast feed (default: sv for Swedish)
+PODCAST_LANGUAGE=sv
+# Cover image URL for podcast apps (optional)
+# PODCAST_IMAGE_URL=https://example.com/podcast-cover.jpg
diff --git a/cmd/kiln/main.go b/cmd/kiln/main.go
index a10e1dc..7f54e0c 100644
--- a/cmd/kiln/main.go
+++ b/cmd/kiln/main.go
@@ -52,16 +52,38 @@ func run() error {
defer scraper.Close()
log.Println("Initialized scraper")
- // Initialize TTS service (optional - only if API key is configured)
+ // Initialize TTS service (optional - requires provider API key)
var ttsSvc *tts.Service
- if cfg.OpenAIAPIKey != "" {
- ttsSvc, err = tts.New(cfg.OpenAIAPIKey, cfg.TTSModel, cfg.TTSVoice, cfg.AudioDir)
+ var ttsProvider tts.Provider
+
+ switch cfg.TTSProvider {
+ case "elevenlabs":
+ if cfg.ElevenLabsAPIKey != "" && cfg.ElevenLabsVoiceID != "" {
+ ttsProvider = tts.NewElevenLabsProvider(
+ cfg.ElevenLabsAPIKey,
+ cfg.ElevenLabsModel,
+ cfg.ElevenLabsVoiceID,
+ cfg.ElevenLabsVoiceName,
+ )
+ log.Printf("Using ElevenLabs TTS provider (model=%s, voice=%s)", cfg.ElevenLabsModel, cfg.ElevenLabsVoiceName)
+ } else {
+ log.Println("TTS disabled (ELEVENLABS_API_KEY and ELEVENLABS_VOICE_ID required for elevenlabs provider)")
+ }
+ default: // "openai"
+ if cfg.OpenAIAPIKey != "" {
+ ttsProvider = tts.NewOpenAIProvider(cfg.OpenAIAPIKey, cfg.TTSModel, cfg.TTSVoice)
+ log.Printf("Using OpenAI TTS provider (model=%s, voice=%s)", cfg.TTSModel, cfg.TTSVoice)
+ } else {
+ log.Println("TTS disabled (OPENAI_API_KEY not set)")
+ }
+ }
+
+ if ttsProvider != nil {
+ ttsSvc, err = tts.New(ttsProvider, cfg.AudioDir)
if err != nil {
return fmt.Errorf("failed to initialize TTS: %w", err)
}
- log.Printf("Initialized TTS service (model=%s, voice=%s, dir=%s)", cfg.TTSModel, cfg.TTSVoice, cfg.AudioDir)
- } else {
- log.Println("TTS disabled (OPENAI_API_KEY not set)")
+ log.Printf("Initialized TTS service (provider=%s, dir=%s)", ttsProvider.Name(), cfg.AudioDir)
}
// Create server
diff --git a/docker-compose.yml b/docker-compose.yml
index 337ea39..84b41ff 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,10 +14,17 @@ services:
- FEED_DESCRIPTION=${FEED_DESCRIPTION:-Articles from Gasetten}
- FEED_LINK=${FEED_LINK:-http://localhost:8080}
- FEED_AUTHOR=${FEED_AUTHOR:-Kiln User}
+ - TTS_PROVIDER=${TTS_PROVIDER:-openai}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- TTS_MODEL=${TTS_MODEL:-tts-1}
- TTS_VOICE=${TTS_VOICE:-alloy}
+ - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
+ - ELEVENLABS_VOICE_ID=${ELEVENLABS_VOICE_ID}
+ - ELEVENLABS_MODEL=${ELEVENLABS_MODEL:-eleven_multilingual_v2}
+ - ELEVENLABS_VOICE_NAME=${ELEVENLABS_VOICE_NAME:-Custom Voice}
- AUDIO_DIR=/data/audio
+ - PODCAST_LANGUAGE=${PODCAST_LANGUAGE:-sv}
+ - PODCAST_IMAGE_URL=${PODCAST_IMAGE_URL}
depends_on:
db:
condition: service_healthy
diff --git a/internal/config/config.go b/internal/config/config.go
index 2b8111d..43e3c77 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -28,10 +28,23 @@ type Config struct {
ScraperHeadless bool
// TTS (Text-to-Speech)
+ TTSProvider string // "openai" or "elevenlabs"
+ AudioDir string
+
+ // OpenAI TTS
OpenAIAPIKey string
TTSModel string
TTSVoice string
- AudioDir string
+
+ // ElevenLabs TTS (for custom voice cloning)
+ ElevenLabsAPIKey string
+ ElevenLabsVoiceID string
+ ElevenLabsModel string
+ ElevenLabsVoiceName string
+
+ // Podcast
+ PodcastLanguage string
+ PodcastImageURL string
}
// Load reads configuration from environment variables
@@ -46,10 +59,17 @@ func Load() (*Config, error) {
FeedLink: getEnv("FEED_LINK", "http://localhost:8080"),
FeedAuthor: getEnv("FEED_AUTHOR", "Kiln User"),
ScraperHeadless: getEnvAsBool("SCRAPER_HEADLESS", true),
- OpenAIAPIKey: getEnv("OPENAI_API_KEY", ""),
- TTSModel: getEnv("TTS_MODEL", "tts-1"),
- TTSVoice: getEnv("TTS_VOICE", "alloy"),
- AudioDir: getEnv("AUDIO_DIR", "/data/audio"),
+ TTSProvider: getEnv("TTS_PROVIDER", "openai"),
+ AudioDir: getEnv("AUDIO_DIR", "/data/audio"),
+ OpenAIAPIKey: getEnv("OPENAI_API_KEY", ""),
+ TTSModel: getEnv("TTS_MODEL", "tts-1"),
+ TTSVoice: getEnv("TTS_VOICE", "alloy"),
+ ElevenLabsAPIKey: getEnv("ELEVENLABS_API_KEY", ""),
+ ElevenLabsVoiceID: getEnv("ELEVENLABS_VOICE_ID", ""),
+ ElevenLabsModel: getEnv("ELEVENLABS_MODEL", "eleven_multilingual_v2"),
+ ElevenLabsVoiceName: getEnv("ELEVENLABS_VOICE_NAME", "Custom Voice"),
+ PodcastLanguage: getEnv("PODCAST_LANGUAGE", "sv"),
+ PodcastImageURL: getEnv("PODCAST_IMAGE_URL", ""),
}
// Validate required fields
diff --git a/internal/server/rss.go b/internal/server/rss.go
index f24ad34..da8d1f3 100644
--- a/internal/server/rss.go
+++ b/internal/server/rss.go
@@ -1,6 +1,7 @@
package server
import (
+ "encoding/xml"
"fmt"
"time"
@@ -66,64 +67,141 @@ func GenerateRSSFeed(articles []*database.Article, cfg *config.Config) (string,
return rss, nil
}
-// GeneratePodcastFeed creates a podcast-compatible RSS feed with audio enclosures
+// Podcast RSS XML structures with iTunes namespace for Pocket Casts compatibility
+
+type podcastRSS struct {
+ XMLName xml.Name `xml:"rss"`
+ Version string `xml:"version,attr"`
+ ITunes string `xml:"xmlns:itunes,attr"`
+ Channel podcastChannel `xml:"channel"`
+}
+
+type podcastChannel struct {
+ Title string `xml:"title"`
+ Link string `xml:"link"`
+ Description string `xml:"description"`
+ Language string `xml:"language"`
+ LastBuildDate string `xml:"lastBuildDate"`
+ ITunesAuthor string `xml:"itunes:author"`
+ ITunesSummary string `xml:"itunes:summary"`
+ ITunesExplicit string `xml:"itunes:explicit"`
+ ITunesType string `xml:"itunes:type"`
+ ITunesImage *podcastITunesImage `xml:"itunes:image,omitempty"`
+ ITunesCategory podcastCategory `xml:"itunes:category"`
+ Items []podcastItem `xml:"item"`
+}
+
+type podcastITunesImage struct {
+ Href string `xml:"href,attr"`
+}
+
+type podcastCategory struct {
+ Text string `xml:"text,attr"`
+}
+
+type podcastItem struct {
+ Title string `xml:"title"`
+ Link string `xml:"link"`
+ GUID podcastGUID `xml:"guid"`
+ Description string `xml:"description"`
+ Author string `xml:"itunes:author,omitempty"`
+ PubDate string `xml:"pubDate"`
+ Enclosure podcastEnclosure `xml:"enclosure"`
+ ITunesDuration string `xml:"itunes:duration,omitempty"`
+ ITunesExplicit string `xml:"itunes:explicit"`
+}
+
+type podcastGUID struct {
+ IsPermaLink string `xml:"isPermaLink,attr"`
+ Value string `xml:",chardata"`
+}
+
+type podcastEnclosure struct {
+ URL string `xml:"url,attr"`
+ Length string `xml:"length,attr"`
+ Type string `xml:"type,attr"`
+}
+
+// GeneratePodcastFeed creates a podcast-compatible RSS feed with iTunes namespace
+// tags for Pocket Casts and other podcast apps.
func GeneratePodcastFeed(articles []*database.Article, audioMap map[int]*database.AudioFile, cfg *config.Config) (string, error) {
now := time.Now()
- feed := &feeds.Feed{
- Title: cfg.FeedTitle + " (Podcast)",
- Link: &feeds.Link{Href: cfg.FeedLink + "/podcast.xml"},
- Description: cfg.FeedDescription + " - Audio versions of articles",
- Author: &feeds.Author{Name: cfg.FeedAuthor},
- Created: now,
+ channel := podcastChannel{
+ Title: cfg.FeedTitle + " (Podcast)",
+ Link: cfg.FeedLink + "/podcast.xml",
+ Description: cfg.FeedDescription + " - Audio versions of articles",
+ Language: cfg.PodcastLanguage,
+ LastBuildDate: now.Format(time.RFC1123Z),
+ ITunesAuthor: cfg.FeedAuthor,
+ ITunesSummary: cfg.FeedDescription + " - Audio versions of articles",
+ ITunesExplicit: "false",
+ ITunesType: "episodic",
+ ITunesCategory: podcastCategory{Text: "News"},
+ }
+
+ if cfg.PodcastImageURL != "" {
+ channel.ITunesImage = &podcastITunesImage{Href: cfg.PodcastImageURL}
}
// Only include articles that have completed audio
- feed.Items = make([]*feeds.Item, 0)
for _, article := range articles {
audio, hasAudio := audioMap[article.ID]
if !hasAudio || audio.Status != "completed" {
continue
}
- item := &feeds.Item{
- Title: getArticleTitle(article),
- Link: &feeds.Link{Href: fmt.Sprintf("%s/articles/%d", cfg.FeedLink, article.ID)},
- Id: fmt.Sprintf("%s/articles/%d/audio", cfg.FeedLink, article.ID),
- Enclosure: &feeds.Enclosure{
- Url: fmt.Sprintf("%s/articles/%d/audio?voice=%s", cfg.FeedLink, article.ID, audio.Voice),
- Length: fmt.Sprintf("%d", audio.FileSize),
- Type: "audio/mpeg",
- },
- }
-
+ description := ""
if article.ContentText != nil {
- description := *article.ContentText
+ description = *article.ContentText
if len(description) > 500 {
description = description[:500] + "..."
}
- item.Description = description
}
+ author := cfg.FeedAuthor
if article.Author != nil {
- item.Author = &feeds.Author{Name: *article.Author}
+ author = *article.Author
}
+ pubDate := article.CreatedAt
if article.PublishedAt != nil {
- item.Created = *article.PublishedAt
- } else {
- item.Created = article.CreatedAt
+ pubDate = *article.PublishedAt
}
- feed.Items = append(feed.Items, item)
+ item := podcastItem{
+ Title: getArticleTitle(article),
+ Link: fmt.Sprintf("%s/articles/%d", cfg.FeedLink, article.ID),
+ Description: description,
+ Author: author,
+ PubDate: pubDate.Format(time.RFC1123Z),
+ GUID: podcastGUID{
+ IsPermaLink: "false",
+ Value: fmt.Sprintf("%s/articles/%d/audio", cfg.FeedLink, article.ID),
+ },
+ Enclosure: podcastEnclosure{
+ URL: fmt.Sprintf("%s/articles/%d/audio?voice=%s", cfg.FeedLink, article.ID, audio.Voice),
+ Length: fmt.Sprintf("%d", audio.FileSize),
+ Type: "audio/mpeg",
+ },
+ ITunesExplicit: "false",
+ }
+
+ channel.Items = append(channel.Items, item)
}
- rss, err := feed.ToRss()
+ rss := podcastRSS{
+ Version: "2.0",
+ ITunes: "http://www.itunes.com/dtds/podcast-1.0.dtd",
+ Channel: channel,
+ }
+
+ output, err := xml.MarshalIndent(rss, "", " ")
if err != nil {
return "", fmt.Errorf("failed to generate podcast RSS: %w", err)
}
- return rss, nil
+ return xml.Header + string(output), nil
}
func getArticleTitle(article *database.Article) string {
diff --git a/internal/server/server.go b/internal/server/server.go
index 28e9503..f3e59c0 100644
--- a/internal/server/server.go
+++ b/internal/server/server.go
@@ -110,7 +110,7 @@ func (s *Server) handleArticleList(w http.ResponseWriter, r *http.Request) {
audioMap, _ := s.db.GetCompletedAudioForArticles(ctx, articleIDs)
// Render template
- ArticleListPage(articles, audioMap, s.ttsEnabled()).Render(ctx, w)
+ ArticleListPage(articles, audioMap, s.ttsInfo()).Render(ctx, w)
}
// handleArticleDetail renders a single article
@@ -134,7 +134,7 @@ func (s *Server) handleArticleDetail(w http.ResponseWriter, r *http.Request) {
audioFiles, _ := s.db.GetAudioFilesByArticle(ctx, id)
// Render template
- ArticleDetailPage(article, audioFiles, s.ttsEnabled()).Render(ctx, w)
+ ArticleDetailPage(article, audioFiles, s.ttsInfo()).Render(ctx, w)
}
// handleScrape triggers a manual scrape operation
@@ -380,6 +380,17 @@ func (s *Server) ttsEnabled() bool {
return s.tts != nil
}
+// ttsInfo returns TTS information for templates
+func (s *Server) ttsInfo() TTSInfo {
+ if s.tts == nil {
+ return TTSInfo{Enabled: false}
+ }
+ return TTSInfo{
+ Enabled: true,
+ Voices: s.tts.AvailableVoices(),
+ }
+}
+
// handleGenerateTTS triggers TTS generation for an article
func (s *Server) handleGenerateTTS(w http.ResponseWriter, r *http.Request) {
if !s.ttsEnabled() {
@@ -399,7 +410,7 @@ func (s *Server) handleGenerateTTS(w http.ResponseWriter, r *http.Request) {
// Get the voice from form data or query param
voice := r.FormValue("voice")
if voice == "" {
- voice = s.config.TTSVoice
+ voice = s.tts.DefaultVoice()
}
// Check if article exists
@@ -508,8 +519,11 @@ func (s *Server) handleServeAudio(w http.ResponseWriter, r *http.Request) {
}
voice := r.URL.Query().Get("voice")
+ if voice == "" && s.tts != nil {
+ voice = s.tts.DefaultVoice()
+ }
if voice == "" {
- voice = s.config.TTSVoice
+ voice = "alloy" // fallback
}
audio, err := s.db.GetAudioFileByArticle(ctx, id, voice)
@@ -555,12 +569,17 @@ func (s *Server) handleServeAudio(w http.ResponseWriter, r *http.Request) {
// handleTTSVoices returns the list of available TTS voices
func (s *Server) handleTTSVoices(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
- voices := tts.AvailableVoices()
+ if s.tts == nil {
+ fmt.Fprint(w, `{"voices":[],"default":""}`)
+ return
+ }
+ voices := s.tts.AvailableVoices()
var items []string
for _, v := range voices {
- items = append(items, fmt.Sprintf(`"%s"`, v))
+ items = append(items, fmt.Sprintf(`{"id":"%s","name":"%s"}`, v.ID, v.Name))
}
- fmt.Fprintf(w, `{"voices":[%s],"default":"%s"}`, strings.Join(items, ","), s.config.TTSVoice)
+ fmt.Fprintf(w, `{"voices":[%s],"default":"%s","provider":"%s"}`,
+ strings.Join(items, ","), s.tts.DefaultVoice(), s.tts.ProviderName())
}
// handleRSS generates and serves the RSS feed
diff --git a/internal/server/templates.templ b/internal/server/templates.templ
index 8a3472e..4a62818 100644
--- a/internal/server/templates.templ
+++ b/internal/server/templates.templ
@@ -6,6 +6,12 @@ import (
"fmt"
)
+// TTSInfo holds TTS state passed to templates
+type TTSInfo struct {
+ Enabled bool
+ Voices []tts.Voice
+}
+
// Layout is the base HTML template
templ Layout(title string) {
@@ -45,7 +51,7 @@ templ Layout(title string) {
}
// ArticleListPage renders the list of articles
-templ ArticleListPage(articles []*database.Article, audioMap map[int]*database.AudioFile, ttsEnabled bool) {
+templ ArticleListPage(articles []*database.Article, audioMap map[int]*database.AudioFile, ttsInfo TTSInfo) {
@Layout("Articles") {
Articles
@@ -89,7 +95,7 @@ templ ArticleListPage(articles []*database.Article, audioMap map[int]*database.A
} else {
for _, article := range articles {
- @ArticleCardWithAudio(article, audioMap[article.ID], ttsEnabled)
+ @ArticleCardWithAudio(article, audioMap[article.ID], ttsInfo.Enabled)
}
}
@@ -152,7 +158,7 @@ templ ArticleCard(article *database.Article) {
}
// ArticleDetailPage renders a single article in detail
-templ ArticleDetailPage(article *database.Article, audioFiles []*database.AudioFile, ttsEnabled bool) {
+templ ArticleDetailPage(article *database.Article, audioFiles []*database.AudioFile, ttsInfo TTSInfo) {
@Layout(getTitle(article)) {
@@ -183,7 +189,7 @@ templ ArticleDetailPage(article *database.Article, audioFiles []*database.AudioF
// TTS Audio Section
- if ttsEnabled {
+ if ttsInfo.Enabled {
Listen to this article
if hasCompletedAudio(audioFiles) {
@@ -192,16 +198,18 @@ templ ArticleDetailPage(article *database.Article, audioFiles []*database.AudioF
@AudioPlayer(article.ID, af)
}
}
-
-
- Generate with a different voice
-
- @VoiceSelector(article.ID)
-
-
-
+ if len(ttsInfo.Voices) > 1 {
+
+
+ Generate with a different voice
+
+ @VoiceSelector(article.ID, ttsInfo.Voices)
+
+
+
+ }
} else {
- @VoiceSelector(article.ID)
+ @VoiceSelector(article.ID, ttsInfo.Voices)
}
}
@@ -229,7 +237,7 @@ templ AudioPlayer(articleID int, audio *database.AudioFile) {
}
// VoiceSelector renders a voice selection form for TTS generation
-templ VoiceSelector(articleID int) {
+templ VoiceSelector(articleID int, voices []tts.Voice) {
")
- if templ_7745c5c3_Err != nil {
- return templ_7745c5c3_Err
+ if len(ttsInfo.Voices) > 1 {
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 42, "Generate with a different voice
")
+ if templ_7745c5c3_Err != nil {
+ return templ_7745c5c3_Err
+ }
+ templ_7745c5c3_Err = VoiceSelector(article.ID, ttsInfo.Voices).Render(ctx, templ_7745c5c3_Buffer)
+ if templ_7745c5c3_Err != nil {
+ return templ_7745c5c3_Err
+ }
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 43, "
")
+ if templ_7745c5c3_Err != nil {
+ return templ_7745c5c3_Err
+ }
}
} else {
- templ_7745c5c3_Err = VoiceSelector(article.ID).Render(ctx, templ_7745c5c3_Buffer)
+ templ_7745c5c3_Err = VoiceSelector(article.ID, ttsInfo.Voices).Render(ctx, templ_7745c5c3_Buffer)
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 43, " ")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 44, "")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 44, "")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 45, "
")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
@@ -556,30 +568,30 @@ func ArticleDetailPage(article *database.Article, audioFiles []*database.AudioFi
return templ_7745c5c3_Err
}
} else if article.ContentText != nil {
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 45, "
")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 46, "
")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var24 string
templ_7745c5c3_Var24, templ_7745c5c3_Err = templ.JoinStringErrs(*article.ContentText)
if templ_7745c5c3_Err != nil {
- return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/server/templates.templ`, Line: 212, Col: 30}
+ return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/server/templates.templ`, Line: 220, Col: 30}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var24))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 46, "
")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 47, "")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
} else {
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 47, "
No content available
")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 48, "
No content available
")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 48, "
")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 49, "
")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
@@ -615,46 +627,46 @@ func AudioPlayer(articleID int, audio *database.AudioFile) templ.Component {
templ_7745c5c3_Var25 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 49, "")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 51, "\">")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var27 string
templ_7745c5c3_Var27, templ_7745c5c3_Err = templ.JoinStringErrs(audio.Voice)
if templ_7745c5c3_Err != nil {
- return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/server/templates.templ`, Line: 224, Col: 83}
+ return templ.Error{Err: templ_7745c5c3_Err, FileName: `internal/server/templates.templ`, Line: 232, Col: 83}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var27))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 51, " ")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 53, "\" type=\"audio/mpeg\">")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
@@ -663,7 +675,7 @@ func AudioPlayer(articleID int, audio *database.AudioFile) templ.Component {
}
// VoiceSelector renders a voice selection form for TTS generation
-func VoiceSelector(articleID int) templ.Component {
+func VoiceSelector(articleID int, voices []tts.Voice) templ.Component {
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
@@ -684,95 +696,95 @@ func VoiceSelector(articleID int) templ.Component {
templ_7745c5c3_Var29 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 53, "")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 62, "\" class=\"bg-green-600 hover:bg-green-700 text-white px-3 py-1 rounded text-sm font-medium\">Generate Audio")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
diff --git a/internal/tts/elevenlabs.go b/internal/tts/elevenlabs.go
new file mode 100644
index 0000000..0078a60
--- /dev/null
+++ b/internal/tts/elevenlabs.go
@@ -0,0 +1,124 @@
+package tts
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+)
+
+const (
+ elevenLabsBaseURL = "https://api.elevenlabs.io/v1/text-to-speech"
+ elevenLabsMaxChunk = 5000 // ElevenLabs supports up to 5000 chars per request
+ elevenLabsDefaultModel = "eleven_multilingual_v2" // Best for non-English (Swedish)
+)
+
+// ElevenLabsProvider implements the Provider interface for ElevenLabs TTS API.
+// Supports custom cloned voices via voice IDs.
+type ElevenLabsProvider struct {
+ apiKey string
+ model string
+ voiceID string // The cloned voice ID from ElevenLabs
+ voiceName string // Display name for the custom voice
+ client *http.Client
+}
+
+// NewElevenLabsProvider creates a new ElevenLabs TTS provider.
+// voiceID is the ID of the cloned voice from the ElevenLabs dashboard.
+// voiceName is an optional display name (defaults to "Custom Voice").
+func NewElevenLabsProvider(apiKey, model, voiceID, voiceName string) *ElevenLabsProvider {
+ if model == "" {
+ model = elevenLabsDefaultModel
+ }
+ if voiceName == "" {
+ voiceName = "Custom Voice"
+ }
+ return &ElevenLabsProvider{
+ apiKey: apiKey,
+ model: model,
+ voiceID: voiceID,
+ voiceName: voiceName,
+ client: &http.Client{},
+ }
+}
+
+// elevenLabsRequest is the request body for the ElevenLabs TTS API
+type elevenLabsRequest struct {
+ Text string `json:"text"`
+ ModelID string `json:"model_id"`
+ VoiceSettings elevenLabsVoiceSettings `json:"voice_settings"`
+}
+
+type elevenLabsVoiceSettings struct {
+ Stability float64 `json:"stability"`
+ SimilarityBoost float64 `json:"similarity_boost"`
+ Style float64 `json:"style,omitempty"`
+}
+
+func (p *ElevenLabsProvider) GenerateChunkAudio(ctx context.Context, text, voiceID string) ([]byte, error) {
+ if voiceID == "" || voiceID == "custom" {
+ voiceID = p.voiceID
+ }
+
+ reqBody := elevenLabsRequest{
+ Text: text,
+ ModelID: p.model,
+ VoiceSettings: elevenLabsVoiceSettings{
+ Stability: 0.5,
+ SimilarityBoost: 0.75,
+ },
+ }
+
+ jsonBody, err := json.Marshal(reqBody)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal request: %w", err)
+ }
+
+ url := fmt.Sprintf("%s/%s?output_format=mp3_44100_128", elevenLabsBaseURL, voiceID)
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(jsonBody))
+ if err != nil {
+ return nil, fmt.Errorf("failed to create request: %w", err)
+ }
+
+ req.Header.Set("xi-api-key", p.apiKey)
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("Accept", "audio/mpeg")
+
+ resp, err := p.client.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("ElevenLabs API request failed: %w", err)
+ }
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read response body: %w", err)
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("ElevenLabs API returned status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return body, nil
+}
+
+func (p *ElevenLabsProvider) AvailableVoices() []Voice {
+ return []Voice{
+ {ID: "custom", Name: p.voiceName},
+ }
+}
+
+func (p *ElevenLabsProvider) DefaultVoice() string {
+ return "custom"
+}
+
+func (p *ElevenLabsProvider) MaxChunkSize() int {
+ return elevenLabsMaxChunk
+}
+
+func (p *ElevenLabsProvider) Name() string {
+ return "elevenlabs"
+}
diff --git a/internal/tts/openai.go b/internal/tts/openai.go
new file mode 100644
index 0000000..a2f983e
--- /dev/null
+++ b/internal/tts/openai.go
@@ -0,0 +1,111 @@
+package tts
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+)
+
+const (
+ openAITTSURL = "https://api.openai.com/v1/audio/speech"
+ openAIMaxChunkSize = 4000 // OpenAI TTS max is 4096 chars, leave some margin
+)
+
+// OpenAIProvider implements the Provider interface for OpenAI's TTS API
+type OpenAIProvider struct {
+ apiKey string
+ model string
+ voice string
+ client *http.Client
+}
+
+// NewOpenAIProvider creates a new OpenAI TTS provider
+func NewOpenAIProvider(apiKey, model, voice string) *OpenAIProvider {
+ if model == "" {
+ model = "tts-1"
+ }
+ if voice == "" {
+ voice = "alloy"
+ }
+ return &OpenAIProvider{
+ apiKey: apiKey,
+ model: model,
+ voice: voice,
+ client: &http.Client{},
+ }
+}
+
+// openAITTSRequest is the request body for OpenAI's TTS API
+type openAITTSRequest struct {
+ Model string `json:"model"`
+ Input string `json:"input"`
+ Voice string `json:"voice"`
+}
+
+func (p *OpenAIProvider) GenerateChunkAudio(ctx context.Context, text, voiceID string) ([]byte, error) {
+ if voiceID == "" {
+ voiceID = p.voice
+ }
+
+ reqBody := openAITTSRequest{
+ Model: p.model,
+ Input: text,
+ Voice: voiceID,
+ }
+
+ jsonBody, err := json.Marshal(reqBody)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal request: %w", err)
+ }
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodPost, openAITTSURL, bytes.NewReader(jsonBody))
+ if err != nil {
+ return nil, fmt.Errorf("failed to create request: %w", err)
+ }
+
+ req.Header.Set("Authorization", "Bearer "+p.apiKey)
+ req.Header.Set("Content-Type", "application/json")
+
+ resp, err := p.client.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("TTS API request failed: %w", err)
+ }
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read response body: %w", err)
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("TTS API returned status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return body, nil
+}
+
+func (p *OpenAIProvider) AvailableVoices() []Voice {
+ return []Voice{
+ {ID: "alloy", Name: "Alloy"},
+ {ID: "echo", Name: "Echo"},
+ {ID: "fable", Name: "Fable"},
+ {ID: "onyx", Name: "Onyx"},
+ {ID: "nova", Name: "Nova"},
+ {ID: "shimmer", Name: "Shimmer"},
+ }
+}
+
+func (p *OpenAIProvider) DefaultVoice() string {
+ return p.voice
+}
+
+func (p *OpenAIProvider) MaxChunkSize() int {
+ return openAIMaxChunkSize
+}
+
+func (p *OpenAIProvider) Name() string {
+ return "openai"
+}
diff --git a/internal/tts/provider.go b/internal/tts/provider.go
new file mode 100644
index 0000000..d82d4e7
--- /dev/null
+++ b/internal/tts/provider.go
@@ -0,0 +1,27 @@
+package tts
+
+import "context"
+
+// Voice represents a TTS voice option
+type Voice struct {
+ ID string // Provider-specific voice identifier
+ Name string // Human-readable display name
+}
+
+// Provider defines the interface for TTS providers (OpenAI, ElevenLabs, etc.)
+type Provider interface {
+ // GenerateChunkAudio converts a single chunk of text to audio bytes.
+ GenerateChunkAudio(ctx context.Context, text, voiceID string) ([]byte, error)
+
+ // AvailableVoices returns the list of voices available from this provider.
+ AvailableVoices() []Voice
+
+ // DefaultVoice returns the default voice ID for this provider.
+ DefaultVoice() string
+
+ // MaxChunkSize returns the maximum characters per API call.
+ MaxChunkSize() int
+
+ // Name returns the provider name (e.g., "openai", "elevenlabs").
+ Name() string
+}
diff --git a/internal/tts/tts.go b/internal/tts/tts.go
index d3037e4..b3da6b0 100644
--- a/internal/tts/tts.go
+++ b/internal/tts/tts.go
@@ -3,63 +3,37 @@ package tts
import (
"bytes"
"context"
- "encoding/json"
"fmt"
- "io"
"log"
- "net/http"
"os"
"path/filepath"
"strings"
"unicode"
)
-const (
- openAITTSURL = "https://api.openai.com/v1/audio/speech"
- maxChunkSize = 4000 // OpenAI TTS max is 4096 chars, leave some margin
-)
-
-// Service handles text-to-speech conversion using OpenAI's API
+// Service handles text-to-speech conversion using a configurable provider.
type Service struct {
- apiKey string
- model string
- voice string
+ provider Provider
audioDir string
- client *http.Client
}
-// New creates a new TTS service
-func New(apiKey, model, voice, audioDir string) (*Service, error) {
- if apiKey == "" {
- return nil, fmt.Errorf("OPENAI_API_KEY is required for TTS")
- }
-
- // Ensure audio directory exists
+// New creates a new TTS service with the given provider.
+func New(provider Provider, audioDir string) (*Service, error) {
if err := os.MkdirAll(audioDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create audio directory %s: %w", audioDir, err)
}
return &Service{
- apiKey: apiKey,
- model: model,
- voice: voice,
+ provider: provider,
audioDir: audioDir,
- client: &http.Client{},
}, nil
}
-// ttsRequest is the request body for OpenAI's TTS API
-type ttsRequest struct {
- Model string `json:"model"`
- Input string `json:"input"`
- Voice string `json:"voice"`
-}
-
// GenerateAudio converts article text to an MP3 file.
// Returns the file path and file size.
func (s *Service) GenerateAudio(ctx context.Context, articleID int, text, voice string) (string, int64, error) {
if voice == "" {
- voice = s.voice
+ voice = s.provider.DefaultVoice()
}
// Clean up text for TTS
@@ -68,16 +42,19 @@ func (s *Service) GenerateAudio(ctx context.Context, articleID int, text, voice
return "", 0, fmt.Errorf("no text content to convert")
}
- // Chunk the text if it exceeds the max size
- chunks := chunkText(text, maxChunkSize)
- log.Printf("TTS: article %d - %d characters, %d chunk(s), voice=%s", articleID, len(text), len(chunks), voice)
+ // Chunk the text based on provider's limit
+ maxSize := s.provider.MaxChunkSize()
+ chunks := chunkText(text, maxSize)
+ log.Printf("TTS [%s]: article %d - %d characters, %d chunk(s), voice=%s",
+ s.provider.Name(), articleID, len(text), len(chunks), voice)
// Generate audio for each chunk
var audioData bytes.Buffer
for i, chunk := range chunks {
- log.Printf("TTS: article %d - generating chunk %d/%d", articleID, i+1, len(chunks))
+ log.Printf("TTS [%s]: article %d - generating chunk %d/%d",
+ s.provider.Name(), articleID, i+1, len(chunks))
- data, err := s.callTTSAPI(ctx, chunk, voice)
+ data, err := s.provider.GenerateChunkAudio(ctx, chunk, voice)
if err != nil {
return "", 0, fmt.Errorf("failed to generate audio for chunk %d: %w", i+1, err)
}
@@ -93,7 +70,8 @@ func (s *Service) GenerateAudio(ctx context.Context, articleID int, text, voice
}
fileSize := int64(audioData.Len())
- log.Printf("TTS: article %d - audio saved to %s (%d bytes)", articleID, filePath, fileSize)
+ log.Printf("TTS [%s]: article %d - audio saved to %s (%d bytes)",
+ s.provider.Name(), articleID, filePath, fileSize)
return filePath, fileSize, nil
}
@@ -101,7 +79,7 @@ func (s *Service) GenerateAudio(ctx context.Context, articleID int, text, voice
// GetAudioPath returns the expected file path for an article's audio
func (s *Service) GetAudioPath(articleID int, voice string) string {
if voice == "" {
- voice = s.voice
+ voice = s.provider.DefaultVoice()
}
filename := fmt.Sprintf("%d_%s.mp3", articleID, voice)
return filepath.Join(s.audioDir, filename)
@@ -112,48 +90,19 @@ func (s *Service) AudioDir() string {
return s.audioDir
}
-// AvailableVoices returns the list of available OpenAI TTS voices
-func AvailableVoices() []string {
- return []string{"alloy", "echo", "fable", "onyx", "nova", "shimmer"}
+// AvailableVoices returns the list of voices from the current provider
+func (s *Service) AvailableVoices() []Voice {
+ return s.provider.AvailableVoices()
}
-// callTTSAPI makes a single request to OpenAI's TTS API
-func (s *Service) callTTSAPI(ctx context.Context, text, voice string) ([]byte, error) {
- reqBody := ttsRequest{
- Model: s.model,
- Input: text,
- Voice: voice,
- }
-
- jsonBody, err := json.Marshal(reqBody)
- if err != nil {
- return nil, fmt.Errorf("failed to marshal request: %w", err)
- }
-
- req, err := http.NewRequestWithContext(ctx, http.MethodPost, openAITTSURL, bytes.NewReader(jsonBody))
- if err != nil {
- return nil, fmt.Errorf("failed to create request: %w", err)
- }
-
- req.Header.Set("Authorization", "Bearer "+s.apiKey)
- req.Header.Set("Content-Type", "application/json")
-
- resp, err := s.client.Do(req)
- if err != nil {
- return nil, fmt.Errorf("TTS API request failed: %w", err)
- }
- defer resp.Body.Close()
-
- body, err := io.ReadAll(resp.Body)
- if err != nil {
- return nil, fmt.Errorf("failed to read response body: %w", err)
- }
-
- if resp.StatusCode != http.StatusOK {
- return nil, fmt.Errorf("TTS API returned status %d: %s", resp.StatusCode, string(body))
- }
+// DefaultVoice returns the default voice ID
+func (s *Service) DefaultVoice() string {
+ return s.provider.DefaultVoice()
+}
- return body, nil
+// ProviderName returns the name of the active TTS provider
+func (s *Service) ProviderName() string {
+ return s.provider.Name()
}
// cleanTextForTTS prepares text for TTS conversion