From 9bc9adbc6bf225aa25052b464f343a519411a96d Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 4 Mar 2026 13:12:26 -0600 Subject: [PATCH] fix: prevent buffer overruns from destroying stream start position MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The server's initial audio burst can far exceed the client's PCM buffer capacity, especially for compact codecs like OPUS (~43s of audio in seconds on LAN). The old "drop oldest" overrun behavior destroyed the beginning of the stream, causing the player to start from the wrong position — up to 22+ seconds into the song while other players started from the beginning. Three-layer fix: 1. Pre-playback overrun safety net: discard incoming audio instead of dropping oldest when the buffer fills before playback starts. This preserves the stream's starting position. 2. SkipStaleAudio: when playback starts and the first segment's timestamp is in the past (late pipeline init), skip forward to near-current audio so all players start at the same position. 3. Buffer capacity: increase default from 30s to 120s (~44MB) with 60s minimum enforcement to prevent stale user configs from causing undersized buffers. Also derives BufferCapacity (compressed bytes advertised to server in client/hello) from the actual PCM buffer duration and worst-case codec bitrate, instead of hardcoding 32MB. Co-Authored-By: Claude Opus 4.6 --- src/Sendspin.SDK/Audio/TimedAudioBuffer.cs | 107 +++++++++++++++++- src/Sendspin.SDK/Client/ClientCapabilities.cs | 4 +- src/SendspinClient/App.xaml.cs | 33 +++++- src/SendspinClient/appsettings.json | 2 +- 4 files changed, 133 insertions(+), 13 deletions(-) diff --git a/src/Sendspin.SDK/Audio/TimedAudioBuffer.cs b/src/Sendspin.SDK/Audio/TimedAudioBuffer.cs index 5c37c44..e8b129c 100644 --- a/src/Sendspin.SDK/Audio/TimedAudioBuffer.cs +++ b/src/Sendspin.SDK/Audio/TimedAudioBuffer.cs @@ -194,7 +194,7 @@ public double SmoothedSyncErrorMicroseconds /// /// Audio format for samples. /// Clock synchronizer for timestamp conversion. - /// Maximum buffer capacity in milliseconds. Should be large enough to absorb the server's initial burst (typically 30s). + /// Maximum buffer capacity in milliseconds. Should be large enough to absorb the server's initial burst. Compact codecs like OPUS can burst 40-60+ seconds; 120s recommended. /// Optional sync correction options. Uses if not provided. /// Optional logger for diagnostics (uses NullLogger if not provided). public TimedAudioBuffer( @@ -241,14 +241,35 @@ public void Write(ReadOnlySpan samples, long serverTimestamp) // Convert server timestamp to local playback time var localPlaybackTime = _clockSync.ServerToClientTime(serverTimestamp); - // Check for overrun - drop oldest if needed + // Check for overrun if (_count + samples.Length > _buffer.Length) { + _overrunCount++; + + if (!_playbackStarted) + { + // Before playback starts, discard INCOMING audio to preserve the stream's + // starting position. The server's initial burst can far exceed buffer capacity + // (especially for compact codecs like OPUS), and dropping the oldest audio + // would destroy the beginning of the stream — causing the player to start + // from the wrong position (potentially tens of seconds into the song). + if (_overrunCount <= 3 || _overrunCount % 500 == 0) + { + _logger.LogDebug( + "[Buffer] Pre-playback overrun #{Count}: discarding incoming {ChunkMs:F1}ms to preserve stream start (buffer full at {CapacityMs}ms)", + _overrunCount, + samples.Length / (double)_samplesPerMs, + _buffer.Length / (double)_samplesPerMs); + } + + return; // Discard incoming chunk — do NOT drop oldest + } + + // During playback, drop oldest to make room (normal overrun behavior) var toDrop = (_count + samples.Length) - _buffer.Length; DropOldestSamples(toDrop); - _overrunCount++; - _logger.LogWarning( - "[Buffer] Overrun #{Count}: dropped {DroppedMs:F1}ms of audio to make room (buffer full at {CapacityMs}ms)", + _logger.LogDebug( + "[Buffer] Overrun #{Count}: dropped {DroppedMs:F1}ms of oldest audio (buffer full at {CapacityMs}ms)", _overrunCount, toDrop / (double)_samplesPerMs, _buffer.Length / (double)_samplesPerMs); @@ -315,6 +336,18 @@ public int Read(Span buffer, long currentLocalTime) return 0; } + // If scheduled time is significantly in the past, skip forward to near-current audio. + if (timeUntilStart < -_syncOptions.ScheduledStartGraceWindowMicroseconds) + { + SkipStaleAudio(currentLocalTime); + } + + _logger.LogInformation( + "[Buffer] Playback starting (Read): timeUntilStart={TimeUntilStart:F1}ms, " + + "buffered={BufferedMs:F0}ms, segments={Segments}, scheduledStart={Scheduled}", + timeUntilStart / 1000.0, _count / (double)_samplesPerMs, + _segments.Count, _scheduledStartLocalTime); + // Scheduled time arrived - start playback! _playbackStarted = true; _waitingForScheduledStart = false; @@ -470,6 +503,18 @@ public int ReadRaw(Span buffer, long currentLocalTime) return 0; } + // Skip stale audio if scheduled time is in the past + if (timeUntilStart < -_syncOptions.ScheduledStartGraceWindowMicroseconds) + { + SkipStaleAudio(currentLocalTime); + } + + _logger.LogInformation( + "[Buffer] Playback starting: timeUntilStart={TimeUntilStart:F1}ms, " + + "buffered={BufferedMs:F0}ms, segments={Segments}, scheduledStart={Scheduled}", + timeUntilStart / 1000.0, _count / (double)_samplesPerMs, + _segments.Count, _scheduledStartLocalTime); + _playbackStarted = true; _waitingForScheduledStart = false; _playbackStartLocalTime = currentLocalTime - CalibratedStartupLatencyMicroseconds; @@ -866,6 +911,58 @@ private int PeekSamplesFromBufferAtOffset(Span destination, int count, in return peeked; } + /// + /// Skips forward through the buffer to discard stale audio whose playback time has already passed. + /// Called when playback starts and the buffer contains audio with timestamps in the past. + /// Without this, a large buffer holding a server burst would start playing from audio that's + /// 20+ seconds old, causing massive sync offset vs other players (even though sync error reads 0). + /// Must be called under lock. + /// + /// Current local time in microseconds. + /// Number of samples skipped. + private int SkipStaleAudio(long currentLocalTime) + { + var totalSkipped = 0; + + // Skip segments whose playback time is in the past, but keep at least + // the target buffer depth worth of audio so we have something to play + while (_segments.Count > 1 && _count > _samplesPerMs * (int)TargetBufferMilliseconds) + { + var segment = _segments.Peek(); + + // Stop skipping once we reach audio that's near-current or in the future. + // Use the grace window as tolerance (default 10ms). + if (segment.LocalPlaybackTime >= currentLocalTime - _syncOptions.ScheduledStartGraceWindowMicroseconds) + break; + + // This segment is stale — skip it + var toSkip = Math.Min(segment.SampleCount, _count); + _readPos = (_readPos + toSkip) % _buffer.Length; + _count -= toSkip; + _droppedSamples += toSkip; + totalSkipped += toSkip; + ConsumeSegments(toSkip); + } + + if (totalSkipped > 0) + { + var skippedMs = totalSkipped / (double)_samplesPerMs; + _logger.LogInformation( + "[Buffer] Skipped {SkippedMs:F0}ms of stale audio on playback start (buffer had audio from the past)", + skippedMs); + + // Re-anchor scheduled start to the new first segment + if (_segments.Count > 0) + { + var newFirst = _segments.Peek(); + _scheduledStartLocalTime = newFirst.LocalPlaybackTime; + _nextExpectedPlaybackTime = newFirst.LocalPlaybackTime; + } + } + + return totalSkipped; + } + /// /// Drops the oldest samples to make room for new data. /// Must be called under lock. diff --git a/src/Sendspin.SDK/Client/ClientCapabilities.cs b/src/Sendspin.SDK/Client/ClientCapabilities.cs index 601732d..5f14e58 100644 --- a/src/Sendspin.SDK/Client/ClientCapabilities.cs +++ b/src/Sendspin.SDK/Client/ClientCapabilities.cs @@ -42,7 +42,9 @@ public sealed class ClientCapabilities }; /// - /// Audio buffer capacity in bytes (32MB like reference implementation). + /// Audio buffer capacity in compressed bytes. The server uses this to limit how much + /// audio it sends ahead. Should be derived from your PCM buffer duration and the + /// highest-bitrate codec you support. Default is 32MB (reference implementation fallback). /// public int BufferCapacity { get; set; } = 32_000_000; diff --git a/src/SendspinClient/App.xaml.cs b/src/SendspinClient/App.xaml.cs index 50e593d..c79f1c6 100644 --- a/src/SendspinClient/App.xaml.cs +++ b/src/SendspinClient/App.xaml.cs @@ -210,6 +210,24 @@ private void ConfigureServices(IServiceCollection services) Log.Information("Player volume: {Volume}%, Muted: {Muted}", playerVolume, playerMuted); + // Read buffer capacity early — needed for both ClientCapabilities and AudioPipeline + const int minBufferCapacityMs = 60000; // 60s minimum — OPUS bursts can exceed 40s + var configuredCapacityMs = _configuration!.GetValue("Audio:Buffer:CapacityMs", 120000); + var bufferCapacityMs = Math.Max(configuredCapacityMs, minBufferCapacityMs); + + // Derive compressed-byte buffer capacity from our PCM buffer duration. + // The server uses this to limit how much audio it sends ahead. + // Use worst-case bitrate (PCM uncompressed) so no codec can overflow our buffer. + var maxBytesPerSecond = audioFormats + .Select(f => f.Codec == "opus" && f.Bitrate > 0 + ? f.Bitrate * 1000 / 8 // OPUS: use declared bitrate (kbps → bytes/sec) + : f.SampleRate * f.Channels * Math.Max(f.BitDepth ?? 16, 16) / 8) // PCM/FLAC: raw sample rate + .Max(); + var bufferCapacityBytes = (int)((long)bufferCapacityMs * maxBytesPerSecond / 1000); + Log.Information( + "Buffer: {CapacityMs}ms PCM → {CapacityMB:F1}MB advertised to server (worst-case {MaxKbps}kbps)", + bufferCapacityMs, bufferCapacityBytes / 1024.0 / 1024.0, maxBytesPerSecond * 8 / 1000); + services.AddSingleton(new ClientCapabilities { ClientId = clientId, @@ -219,7 +237,8 @@ private void ConfigureServices(IServiceCollection services) SoftwareVersion = appVersion, AudioFormats = audioFormats, InitialVolume = playerVolume, - InitialMuted = playerMuted + InitialMuted = playerMuted, + BufferCapacity = bufferCapacityBytes }); // Clock synchronization for multi-room audio sync @@ -284,12 +303,14 @@ private void ConfigureServices(IServiceCollection services) var decoderFactory = sp.GetRequiredService(); var clockSync = sp.GetRequiredService(); - // Read buffer configuration - // Capacity must hold the full server burst without overflowing. - // We advertise 32MB BufferCapacity (compressed bytes) — for FLAC with ~4:1 - // compression, that decodes to 30+ seconds of audio. Default 30s matches this. + // Buffer capacity (bufferCapacityMs) was computed earlier for ClientCapabilities var bufferTargetMs = _configuration!.GetValue("Audio:Buffer:TargetMs", 250); - var bufferCapacityMs = _configuration!.GetValue("Audio:Buffer:CapacityMs", 30000); + if (configuredCapacityMs < minBufferCapacityMs) + { + logger.LogWarning( + "Buffer capacity {ConfiguredMs}ms is below minimum {MinMs}ms (stale user config?), using {ActualMs}ms", + configuredCapacityMs, minBufferCapacityMs, bufferCapacityMs); + } // Read clock sync wait configuration var waitForConvergence = _configuration!.GetValue("Audio:ClockSync:WaitForConvergence", true); diff --git a/src/SendspinClient/appsettings.json b/src/SendspinClient/appsettings.json index 06acc4b..a314b4c 100644 --- a/src/SendspinClient/appsettings.json +++ b/src/SendspinClient/appsettings.json @@ -18,7 +18,7 @@ "StaticDelayMs": 200, "Buffer": { "TargetMs": 250, - "CapacityMs": 30000 + "CapacityMs": 120000 }, "ClockSync": { "ForgetFactor": 1.001,