From b4fc89e32ee0faaad11d489b2ea869363bd60aaa Mon Sep 17 00:00:00 2001 From: Alireza Eliaderani <172368+cubny@users.noreply.github.com> Date: Tue, 13 Jan 2026 22:47:19 +0100 Subject: [PATCH 1/2] fix: proactive session rotation --- src/focus/cli.py | 19 +- src/focus/generation/lyria_client.py | 365 ++++++++++++++++++--------- 2 files changed, 264 insertions(+), 120 deletions(-) diff --git a/src/focus/cli.py b/src/focus/cli.py index b0db8f1..759e3f8 100644 --- a/src/focus/cli.py +++ b/src/focus/cli.py @@ -126,6 +126,12 @@ def show_profiles(): default=False, help="Show detailed debug output", ) +@click.option( + "--track-duration", + type=int, + default=9, + help="Duration of each track before rotation (1-9 minutes, default: 9)", +) def start_session( profile: str, frequency: float | None, @@ -138,6 +144,7 @@ def start_session( stereo_width: float, limiter: bool, verbose: bool, + track_duration: int, ): """Start a focus music session. @@ -207,6 +214,7 @@ def start_session( stereo_width=stereo_width, limiter=limiter, verbose=verbose, + track_duration=track_duration, ) ) except KeyboardInterrupt: @@ -226,6 +234,7 @@ async def _run_session( stereo_width: float = 1.2, limiter: bool = True, verbose: bool = False, + track_duration: int = 9, ): """Run the audio generation session.""" try: @@ -242,7 +251,15 @@ async def _run_session( brightness=profile.brightness or 0.5, ) - client = create_client(config, use_mock=use_mock, verbose=verbose) + # Clamp track duration to valid range (1-9 minutes) + track_duration_seconds = max(60, min(9 * 60, track_duration * 60)) + + client = create_client( + config, + use_mock=use_mock, + verbose=verbose, + session_duration=track_duration_seconds, + ) if not AUDIO_AVAILABLE: click.echo("⚠️ sounddevice not available, running in test mode") diff --git a/src/focus/generation/lyria_client.py b/src/focus/generation/lyria_client.py index aa37451..c03a434 100644 --- a/src/focus/generation/lyria_client.py +++ b/src/focus/generation/lyria_client.py @@ -2,16 +2,67 @@ This module provides a WebSocket client for the Lyria RealTime API, enabling real-time streaming of AI-generated instrumental music. + +Implements proactive session rotation to avoid the 10-minute API limit. """ import asyncio import os +import time import warnings from collections.abc import AsyncIterator from dataclasses import dataclass, field import numpy as np +# Session rotation constants +# Lyria has a 10-minute session limit; rotate proactively at 9 minutes +SESSION_MAX_DURATION_SECONDS = 9 * 60 # 9 minutes +CROSSFADE_DURATION_SECONDS = 3.0 # Seconds to buffer for seamless transition + + +def _apply_crossfade( + old_audio: np.ndarray, new_audio: np.ndarray, sample_rate: int +) -> np.ndarray: + """Apply crossfade between two audio chunks for seamless transition. + + Args: + old_audio: Tail of the previous session's audio (stereo, float32) + new_audio: Head of the new session's audio (stereo, float32) + sample_rate: Audio sample rate (e.g., 48000) + + Returns: + Crossfaded audio chunk. + """ + crossfade_samples = int(CROSSFADE_DURATION_SECONDS * sample_rate) + + # Ensure we have enough samples for crossfade + old_samples = min(len(old_audio), crossfade_samples) + new_samples = min(len(new_audio), crossfade_samples) + overlap_samples = min(old_samples, new_samples) + + if overlap_samples == 0: + return new_audio + + # Create fade curves (linear for simplicity and low CPU) + fade_out = np.linspace(1.0, 0.0, overlap_samples, dtype=np.float32) + fade_in = np.linspace(0.0, 1.0, overlap_samples, dtype=np.float32) + + # Apply fades to stereo audio + if old_audio.ndim == 2: + fade_out = fade_out[:, np.newaxis] + fade_in = fade_in[:, np.newaxis] + + # Blend the overlapping region + old_tail = old_audio[-overlap_samples:] * fade_out + new_head = new_audio[:overlap_samples] * fade_in + blended = old_tail + new_head + + # Return: blended overlap + rest of new audio + if overlap_samples < len(new_audio): + return np.concatenate([blended, new_audio[overlap_samples:]], axis=0) + return blended + try: from google import genai from google.genai import types @@ -42,12 +93,19 @@ class LyriaClient: Uses WebSocket connection for real-time streaming music generation. Audio is streamed as 16-bit PCM at 48kHz. + + Implements proactive session rotation to avoid the 10-minute + API limit which causes audio degradation. """ config: LyriaConfig + session_duration: int = SESSION_MAX_DURATION_SECONDS # Configurable rotation time _client: object = field(default=None, init=False, repr=False) _session: object = field(default=None, init=False, repr=False) _running: bool = field(default=False, init=False) + _session_start_time: float = field(default=0.0, init=False, repr=False) + _crossfade_buffer: np.ndarray | None = field(default=None, init=False, repr=False) + _session_count: int = field(default=0, init=False, repr=False) verbose: bool = field(default=False, init=True) def __post_init__(self): @@ -73,144 +131,209 @@ async def connect(self, api_key: str | None = None) -> None: self._running = True async def generate_stream(self) -> AsyncIterator[np.ndarray]: - """Generate continuous music stream. + """Generate continuous music stream with automatic session rotation. Yields: Audio chunks as numpy arrays, shape (samples, 2) for stereo, dtype float32, normalized to [-1, 1]. Note: - Implements retry logic with exponential backoff for transient - WebSocket errors (e.g., 1011 "service unavailable"). After - exhausting retries, falls back to EnhancedSynthClient. + Proactively rotates sessions at 9 minutes to avoid the 10-minute + API limit which causes audio degradation. Crossfades between + sessions for seamless transitions. """ if not self._client: raise RuntimeError("Client not connected. Call connect() first.") max_retries = 3 - retry_count = 0 - base_delay = 1.0 # seconds + use_fallback = False - while retry_count <= max_retries and self._running: - try: - if self.verbose: - if retry_count > 0: - print(f" [Lyria] Retry attempt {retry_count}/{max_retries}...") - else: - print(" [Lyria] Connecting to live session...") - - # Connect to Lyria music model - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", message="Realtime music generation is experimental" - ) - async with self._client.aio.live.music.connect( - model="models/lyria-realtime-exp", - ) as session: - self._session = session - # Reset retry count on successful connection - retry_count = 0 + # Main loop: rotates sessions when approaching time limit + while self._running and not use_fallback: + retry_count = 0 + base_delay = 1.0 + session_needs_rotation = False - if self.verbose: - print(" [Lyria] Session connected") - - # Configure music generation parameters - await session.set_music_generation_config( - config=types.LiveMusicGenerationConfig( - bpm=self.config.bpm, - temperature=self.config.temperature, - guidance=self.config.guidance, - density=self.config.density, - brightness=self.config.brightness, + while retry_count <= max_retries and self._running and not session_needs_rotation: + try: + self._session_count += 1 + if self.verbose: + if retry_count > 0: + print(f" [Lyria] Retry attempt {retry_count}/{max_retries}...") + elif self._session_count > 1: + print( + f" [Lyria] Starting session #{self._session_count} (rotation)..." ) - ) + else: + print(" [Lyria] Connecting to live session...") - # Set the music style prompt - await session.set_weighted_prompts( - prompts=[types.WeightedPrompt(text=self.config.prompt, weight=1.0)] + # Connect to Lyria music model + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", message="Realtime music generation is experimental" ) + async with self._client.aio.live.music.connect( + model="models/lyria-realtime-exp", + ) as session: + self._session = session + self._session_start_time = time.monotonic() + retry_count = 0 # Reset on successful connection + + if self.verbose: + print(" [Lyria] Session connected") + + # Configure music generation parameters + await session.set_music_generation_config( + config=types.LiveMusicGenerationConfig( + bpm=self.config.bpm, + temperature=self.config.temperature, + guidance=self.config.guidance, + density=self.config.density, + brightness=self.config.brightness, + ) + ) - # Start playback - await session.play() + # Set the music style prompt + await session.set_weighted_prompts( + prompts=[types.WeightedPrompt(text=self.config.prompt, weight=1.0)] + ) + + # Start playback + await session.play() + if self.verbose: + print(" [Lyria] Playback started, receiving audio...") + + # Receive audio chunks + async for message in session.receive(): + if not self._running: + return # Clean exit + + # Check session duration for rotation + elapsed = time.monotonic() - self._session_start_time + if elapsed >= self.session_duration: + if self.verbose: + print( + f" [Lyria] Session #{self._session_count} " + f"reached {elapsed:.0f}s, rotating..." + ) + session_needs_rotation = True + break # Exit receive loop to rotate + + # Check for audio data in the message + if ( + hasattr(message, "server_content") + and message.server_content + and hasattr(message.server_content, "audio_chunks") + and message.server_content.audio_chunks + ): + # Get raw audio data from the chunk + audio_data = message.server_content.audio_chunks[0].data + + # Raw 16-bit PCM audio + audio_int16 = np.frombuffer(audio_data, dtype=np.int16) + audio_float = audio_int16.astype(np.float32) / 32768.0 + + # Reshape to stereo (interleaved L/R samples) + if self.config.channels == 2 and len(audio_float) >= 2: + audio_float = audio_float.reshape(-1, 2) + + # Apply crossfade from previous session + if self._crossfade_buffer is not None: + audio_float = _apply_crossfade( + self._crossfade_buffer, + audio_float, + self.config.sample_rate, + ) + self._crossfade_buffer = None # Clear buffer + + # Buffer tail ONLY when approaching rotation + # to avoid crossfading continuous playback + time_until_rotation = ( + self.session_duration - elapsed + ) + if time_until_rotation <= 5.0: # Last 5 seconds + crossfade_samples = int( + CROSSFADE_DURATION_SECONDS + * self.config.sample_rate + ) + if len(audio_float) >= crossfade_samples: + tail = audio_float[-crossfade_samples:] + self._crossfade_buffer = tail.copy() + + yield audio_float + + # If session ended cleanly without rotation, we're done + if not session_needs_rotation: + return + + except Exception as e: + error_msg = str(e) + + # Check for non-retryable errors (model not found) + if "404" in error_msg or "not found" in error_msg.lower(): if self.verbose: - print(" [Lyria] Playback started, receiving audio...") - - # Receive audio chunks - async for message in session.receive(): - if not self._running: - return # Clean exit - - # Check for audio data in the message - # The Lyria API returns audio in server_content.audio_chunks - if ( - hasattr(message, "server_content") - and message.server_content - and hasattr(message.server_content, "audio_chunks") - and message.server_content.audio_chunks - ): - # Get raw audio data from the chunk - audio_data = message.server_content.audio_chunks[0].data - - # Raw 16-bit PCM audio - audio_int16 = np.frombuffer(audio_data, dtype=np.int16) - audio_float = audio_int16.astype(np.float32) / 32768.0 - - # Reshape to stereo (interleaved L/R samples) - if self.config.channels == 2 and len(audio_float) >= 2: - audio_float = audio_float.reshape(-1, 2) - - yield audio_float - - # Normal completion (session.receive() ended cleanly) - return - - except Exception as e: - error_msg = str(e) - - # Check for non-retryable errors (model not found) - if "404" in error_msg or "not found" in error_msg.lower(): - if self.verbose: - print(f" ⚠️ Lyria model unreachable ({error_msg})") - print(" 🔄 Falling back to Enhanced Synth engine...") - break # Go straight to fallback - - # Check for retryable errors (WebSocket issues, service unavailable) - is_retryable = any( - indicator in error_msg.lower() - for indicator in [ - "1011", # WebSocket internal error - "service", # "service unavailable" - "connection", # Connection errors - "websocket", # WebSocket errors - "timeout", # Timeouts - "closed", # Connection closed - ] - ) + print(f" ⚠️ Lyria model unreachable ({error_msg})") + print(" 🔄 Falling back to Enhanced Synth engine...") + use_fallback = True + break + + # Check for retryable errors + is_retryable = any( + indicator in error_msg.lower() + for indicator in [ + "1011", + "service", + "connection", + "websocket", + "timeout", + "closed", + ] + ) - if is_retryable and retry_count < max_retries: - retry_count += 1 - delay = base_delay * (2 ** (retry_count - 1)) # Exponential backoff - if self.verbose: - print( - f" ⚠️ Lyria connection error: {error_msg[:80]}..." - if len(error_msg) > 80 - else f" ⚠️ Lyria connection error: {error_msg}" - ) - print(f" 🔄 Retrying in {delay:.1f}s ({retry_count}/{max_retries})...") - await asyncio.sleep(delay) - continue - else: - # Non-retryable or retries exhausted - if self.verbose: - if retry_count >= max_retries: - print(f" ❌ Lyria retries exhausted after {max_retries} attempts") - else: - print(f" ❌ Lyria error (non-retryable): {error_msg}") - print(" 🔄 Falling back to Enhanced Synth engine...") - break # Go to fallback + if is_retryable and retry_count < max_retries: + retry_count += 1 + delay = base_delay * (2 ** (retry_count - 1)) + if self.verbose: + print( + f" ⚠️ Lyria connection error: {error_msg[:80]}..." + if len(error_msg) > 80 + else f" ⚠️ Lyria connection error: {error_msg}" + ) + print( + f" 🔄 Retrying in {delay:.1f}s " + f"({retry_count}/{max_retries})..." + ) + await asyncio.sleep(delay) + continue + else: + if self.verbose: + if retry_count >= max_retries: + print(f" ❌ Lyria retries exhausted after {max_retries} attempts") + else: + print(f" ❌ Lyria error (non-retryable): {error_msg}") + print(" 🔄 Falling back to Enhanced Synth engine...") + use_fallback = True + break + + # Yield transition audio during session rotation gap + # This fills the silence while the new session connects + if session_needs_rotation and self._crossfade_buffer is not None: + if self.verbose: + print(" [Lyria] Playing transition audio...") + # Apply fade-out to transition buffer and yield it + # This provides audio continuity during connection + fade_samples = len(self._crossfade_buffer) + fade_out = np.linspace(1.0, 0.3, fade_samples, dtype=np.float32) + if self._crossfade_buffer.ndim == 2: + fade_out = fade_out[:, np.newaxis] + transition_audio = self._crossfade_buffer * fade_out + yield transition_audio.astype(np.float32) + # Keep buffer for crossfade with new session's first chunk + # (but attenuated to avoid double-loud join) + self._crossfade_buffer = (self._crossfade_buffer * 0.3).astype(np.float32) # Fallback to EnhancedSynthClient - if self._running: + if self._running and use_fallback: synth = EnhancedSynthClient(self.config, verbose=self.verbose) await synth.connect() async for chunk in synth.generate_stream(): @@ -334,7 +457,10 @@ async def set_prompt(self, new_prompt: str) -> None: def create_client( - config: LyriaConfig, use_mock: bool = False, verbose: bool = False + config: LyriaConfig, + use_mock: bool = False, + verbose: bool = False, + session_duration: int = SESSION_MAX_DURATION_SECONDS, ) -> LyriaClient | EnhancedSynthClient: """Create a Lyria client or fallback to Synth. @@ -342,6 +468,7 @@ def create_client( config: Configuration for music generation. use_mock: If True, force usage of Synth client. verbose: If True, enable verbose debug output. + session_duration: Duration in seconds before session rotation (max 540). Returns: LyriaClient or EnhancedSynthClient. @@ -349,4 +476,4 @@ def create_client( if use_mock or not GENAI_AVAILABLE: return EnhancedSynthClient(config, verbose=verbose) - return LyriaClient(config, verbose=verbose) + return LyriaClient(config, session_duration=session_duration, verbose=verbose) From 910db587509f43e637dee2341a505a6d8ed61890 Mon Sep 17 00:00:00 2001 From: Alireza Eliaderani <172368+cubny@users.noreply.github.com> Date: Tue, 13 Jan 2026 22:53:34 +0100 Subject: [PATCH 2/2] make lint happy --- src/focus/generation/lyria_client.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/focus/generation/lyria_client.py b/src/focus/generation/lyria_client.py index c03a434..0868c5a 100644 --- a/src/focus/generation/lyria_client.py +++ b/src/focus/generation/lyria_client.py @@ -21,9 +21,7 @@ CROSSFADE_DURATION_SECONDS = 3.0 # Seconds to buffer for seamless transition -def _apply_crossfade( - old_audio: np.ndarray, new_audio: np.ndarray, sample_rate: int -) -> np.ndarray: +def _apply_crossfade(old_audio: np.ndarray, new_audio: np.ndarray, sample_rate: int) -> np.ndarray: """Apply crossfade between two audio chunks for seamless transition. Args: @@ -63,6 +61,7 @@ def _apply_crossfade( return np.concatenate([blended, new_audio[overlap_samples:]], axis=0) return blended + try: from google import genai from google.genai import types @@ -248,13 +247,10 @@ async def generate_stream(self) -> AsyncIterator[np.ndarray]: # Buffer tail ONLY when approaching rotation # to avoid crossfading continuous playback - time_until_rotation = ( - self.session_duration - elapsed - ) + time_until_rotation = self.session_duration - elapsed if time_until_rotation <= 5.0: # Last 5 seconds crossfade_samples = int( - CROSSFADE_DURATION_SECONDS - * self.config.sample_rate + CROSSFADE_DURATION_SECONDS * self.config.sample_rate ) if len(audio_float) >= crossfade_samples: tail = audio_float[-crossfade_samples:] @@ -300,8 +296,7 @@ async def generate_stream(self) -> AsyncIterator[np.ndarray]: else f" ⚠️ Lyria connection error: {error_msg}" ) print( - f" 🔄 Retrying in {delay:.1f}s " - f"({retry_count}/{max_retries})..." + f" 🔄 Retrying in {delay:.1f}s ({retry_count}/{max_retries})..." ) await asyncio.sleep(delay) continue