linuxmatters · flexiondotorg · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/internal/audio/reader.go b/internal/audio/reader.go
@@ -4,6 +4,7 @@ package audio
 import (
 	"errors"
 	"fmt"
+	"math"
 
 	ffmpeg "github.com/linuxmatters/ffmpeg-statigo"
 )
@@ -183,6 +184,17 @@ func (r *Reader) GetDecoderContext() *ffmpeg.AVCodecContext {
 	return r.decCtx
 }
 
+// Seek seeks to the specified timestamp in AV_TIME_BASE units.
+// Use 0 to seek to the beginning of the file. After seeking, the decoder
+// buffers are flushed so that subsequent ReadFrame calls return fresh data.
+func (r *Reader) Seek(timestamp int64) error {
+	if _, err := ffmpeg.AVFormatSeekFile(r.fmtCtx, -1, math.MinInt64, timestamp, math.MaxInt64, 0); err != nil {
+		return fmt.Errorf("failed to seek: %w", err)
+	}
+	ffmpeg.AVCodecFlushBuffers(r.decCtx)
+	return nil
+}
+
 // Close releases all resources
 func (r *Reader) Close() {
 	if r.frame != nil {

diff --git a/internal/processor/analyzer.go b/internal/processor/analyzer.go
@@ -792,15 +792,12 @@ func getIntervalsInRange(intervals []IntervalSample, start, end time.Duration) [
 		return nil
 	}
 
-	// Find first interval at or after start time
-	startIdx := -1
-	for i, interval := range intervals {
-		if interval.Timestamp >= start {
-			startIdx = i
-			break
-		}
-	}
-	if startIdx < 0 {
+	// Find first interval at or after start time using binary search
+	// (intervals are sorted by timestamp from the collection loop in AnalyzeAudio)
+	startIdx := sort.Search(len(intervals), func(i int) bool {
+		return intervals[i].Timestamp >= start
+	})
+	if startIdx >= len(intervals) {
 		return nil
 	}
 
@@ -1153,6 +1150,36 @@ func roomToneScore(interval IntervalSample, rmsP50, fluxP50 float64) float64 {
 	return roomToneAmplitudeWeight*amplitudeScore + roomToneFluxWeight*fluxScore
 }
 
+// silenceMedians holds pre-computed median values for silence/room-tone detection.
+// Avoids redundant O(n log n) sorts when the same interval data is used by
+// multiple detection functions.
+type silenceMedians struct {
+	rmsP50  float64
+	fluxP50 float64
+}
+
+// computeSilenceMedians calculates RMS and spectral flux medians from the
+// search interval slice used for silence/room-tone detection. The caller
+// passes the already-sliced searchIntervals (first silenceSearchPercent% of intervals).
+func computeSilenceMedians(searchIntervals []IntervalSample) silenceMedians {
+	if len(searchIntervals) == 0 {
+		return silenceMedians{}
+	}
+	rmsLevels := make([]float64, len(searchIntervals))
+	fluxValues := make([]float64, len(searchIntervals))
+	for i, interval := range searchIntervals {
+		rmsLevels[i] = interval.RMSLevel
+		fluxValues[i] = interval.SpectralFlux
+	}
+	sort.Float64s(rmsLevels)
+	sort.Float64s(fluxValues)
+
+	return silenceMedians{
+		rmsP50:  rmsLevels[len(rmsLevels)/2],
+		fluxP50: fluxValues[len(fluxValues)/2],
+	}
+}
+
 // estimateNoiseFloorAndThreshold analyses interval data to estimate noise floor and silence threshold.
 // Returns (noiseFloor, silenceThreshold, ok). If ok is false, fallback values should be used.
 //
@@ -1163,7 +1190,7 @@ func roomToneScore(interval IntervalSample, rmsP50, fluxP50 float64) float64 {
 //
 // The noise floor is the max RMS of high-confidence room tone intervals.
 // The silence threshold adds headroom to the noise floor for detection margin.
-func estimateNoiseFloorAndThreshold(intervals []IntervalSample) (noiseFloor, silenceThreshold float64, ok bool) {
+func estimateNoiseFloorAndThreshold(intervals []IntervalSample, medians silenceMedians) (noiseFloor, silenceThreshold float64, ok bool) {
 	if len(intervals) < silenceThresholdMinIntervals {
 		return 0, 0, false
 	}
@@ -1175,18 +1202,9 @@ func estimateNoiseFloorAndThreshold(intervals []IntervalSample) (noiseFloor, sil
 	}
 	searchIntervals := intervals[:searchLimit]
 
-	// Calculate medians for scoring reference
-	rmsLevels := make([]float64, len(searchIntervals))
-	fluxValues := make([]float64, len(searchIntervals))
-	for i, interval := range searchIntervals {
-		rmsLevels[i] = interval.RMSLevel
-		fluxValues[i] = interval.SpectralFlux
-	}
-	sort.Float64s(rmsLevels)
-	sort.Float64s(fluxValues)
-
-	rmsP50 := rmsLevels[len(rmsLevels)/2]
-	fluxP50 := fluxValues[len(fluxValues)/2]
+	// Use pre-computed medians for scoring reference
+	rmsP50 := medians.rmsP50
+	fluxP50 := medians.fluxP50
 
 	// Score each interval for room tone likelihood
 	type scoredInterval struct {
@@ -1233,15 +1251,15 @@ func estimateNoiseFloorAndThreshold(intervals []IntervalSample) (noiseFloor, sil
 // Uses a room tone score approach that considers both amplitude and spectral stability.
 //
 // Detection algorithm:
-// 1. Calculate reference values (medians) for room tone scoring
+// 1. Use pre-computed reference values (medians) for room tone scoring
 // 2. Score each interval for "room tone likelihood"
 // 3. Use a score threshold (0.5) to identify room tone intervals
 // 4. Find consecutive runs that meet minimum duration (8 seconds)
 //
 // The RMS threshold parameter is used as a hard ceiling - intervals above it
 // cannot be silence regardless of spectral characteristics.
 // Candidates in the first 15 seconds are excluded (typically contains intro).
-func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold float64, _ float64) []SilenceRegion {
+func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold float64, medians silenceMedians) []SilenceRegion {
 	if len(intervals) < minimumSilenceIntervals {
 		return nil
 	}
@@ -1251,20 +1269,10 @@ func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold fl
 	if searchLimit < minimumSilenceIntervals {
 		searchLimit = minimumSilenceIntervals
 	}
-	searchIntervals := intervals[:searchLimit]
-
-	// Calculate medians for room tone scoring
-	rmsLevels := make([]float64, len(searchIntervals))
-	fluxValues := make([]float64, len(searchIntervals))
-	for i, interval := range searchIntervals {
-		rmsLevels[i] = interval.RMSLevel
-		fluxValues[i] = interval.SpectralFlux
-	}
-	sort.Float64s(rmsLevels)
-	sort.Float64s(fluxValues)
 
-	rmsP50 := rmsLevels[len(rmsLevels)/2]
-	fluxP50 := fluxValues[len(fluxValues)/2]
+	// Use pre-computed medians for room tone scoring
+	rmsP50 := medians.rmsP50
+	fluxP50 := medians.fluxP50
 
 	var candidates []SilenceRegion
 	var silenceStart time.Duration
@@ -1750,14 +1758,13 @@ func extractSpectralMetrics(metadata *ffmpeg.AVDictionary) spectralMetrics {
 // extractIntervalFrameMetrics extracts per-frame metrics for interval accumulation.
 // Only collects metrics that are valid per-window (aspectralstats, ebur128 windowed).
 // Excludes astats which provides cumulative values, not per-interval.
-func extractIntervalFrameMetrics(metadata *ffmpeg.AVDictionary) intervalFrameMetrics {
+func extractIntervalFrameMetrics(metadata *ffmpeg.AVDictionary, spectral spectralMetrics) intervalFrameMetrics {
 	var m intervalFrameMetrics
 
 	// Peak level from astats (used for max tracking, which is valid per-interval)
 	m.PeakLevel, _ = getFloatMetadata(metadata, metaKeyPeakLevel)
 
-	// aspectralstats metrics (valid per-window measurements)
-	spectral := extractSpectralMetrics(metadata)
+	// aspectralstats metrics (valid per-window measurements, pre-extracted by caller)
 	m.SpectralMean = spectral.Mean
 	m.SpectralVariance = spectral.Variance
 	m.SpectralCentroid = spectral.Centroid
@@ -1790,14 +1797,14 @@ func extractIntervalFrameMetrics(metadata *ffmpeg.AVDictionary) intervalFrameMet
 // extractFrameMetadata extracts audio analysis metadata from a filtered frame.
 // Updates accumulators with spectral, astats, and ebur128 measurements.
 // Called from both the main processing loop and the flush loop.
-func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulators) {
+func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulators, spectral spectralMetrics) {
 	if metadata == nil {
 		return
 	}
 
-	// Extract all aspectralstats measurements (averaged across frames)
+	// Accumulate pre-extracted spectral metrics (averaged across frames)
 	// For mono audio, spectral stats are under channel .1
-	acc.accumulateSpectral(extractSpectralMetrics(metadata))
+	acc.accumulateSpectral(spectral)
 
 	// Extract astats measurements (cumulative, so we keep the latest)
 	// For mono audio, stats are under channel .1
@@ -2299,12 +2306,16 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f
 				return nil, fmt.Errorf("failed to get filtered frame: %w", err)
 			}
 
+			// Extract spectral metrics once, reuse for both whole-file and interval accumulators
+			metadata := filteredFrame.Metadata()
+			spectral := extractSpectralMetrics(metadata)
+
 			// Extract measurements from frame metadata (whole-file accumulators)
-			extractFrameMetadata(filteredFrame.Metadata(), acc)
+			extractFrameMetadata(metadata, acc, spectral)
 
 			// Also accumulate into current interval for per-interval spectral data
 			// Filtered frames roughly correspond to input timing (just at higher sample rate)
-			intervalAcc.add(extractIntervalFrameMetrics(filteredFrame.Metadata()))
+			intervalAcc.add(extractIntervalFrameMetrics(metadata, spectral))
 
 			ffmpeg.AVFrameUnref(filteredFrame)
 		}
@@ -2324,11 +2335,15 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f
 			return nil, fmt.Errorf("failed to get filtered frame: %w", err)
 		}
 
+		// Extract spectral metrics once, reuse for both whole-file and interval accumulators
+		metadata := filteredFrame.Metadata()
+		spectral := extractSpectralMetrics(metadata)
+
 		// Extract measurements from remaining frames
-		extractFrameMetadata(filteredFrame.Metadata(), acc)
+		extractFrameMetadata(metadata, acc, spectral)
 
 		// Also accumulate into current interval for per-interval spectral data
-		intervalAcc.add(extractIntervalFrameMetrics(filteredFrame.Metadata()))
+		intervalAcc.add(extractIntervalFrameMetrics(metadata, spectral))
 
 		ffmpeg.AVFrameUnref(filteredFrame)
 	}
@@ -2347,7 +2362,18 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f
 
 	// Estimate noise floor and silence threshold from interval data
 	// This replaces the previous separate pre-scan pass
-	noiseFloorEstimate, silenceThreshold, ok := estimateNoiseFloorAndThreshold(intervals)
+
+	// Pre-compute silence detection medians (shared by noise estimation and candidate detection)
+	silSearchLimit := len(intervals) * silenceSearchPercent / 100
+	if silSearchLimit < silenceThresholdMinIntervals {
+		silSearchLimit = silenceThresholdMinIntervals
+	}
+	if silSearchLimit > len(intervals) {
+		silSearchLimit = len(intervals)
+	}
+	silMedians := computeSilenceMedians(intervals[:silSearchLimit])
+
+	noiseFloorEstimate, silenceThreshold, ok := estimateNoiseFloorAndThreshold(intervals, silMedians)
 	if !ok {
 		// Fallback if insufficient interval data (very short recordings)
 		noiseFloorEstimate = defaultNoiseFloor
@@ -2473,7 +2499,7 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f
 
 	// Detect silence regions using threshold already computed from interval distribution
 	// The silenceThreshold was calculated above via estimateNoiseFloorAndThreshold()
-	measurements.SilenceRegions = findSilenceCandidatesFromIntervals(intervals, silenceThreshold, 0)
+	measurements.SilenceRegions = findSilenceCandidatesFromIntervals(intervals, silenceThreshold, silMedians)
 
 	// Extract noise profile from best silence region (if available)
 	// Uses interval data for all measurements - no file re-reading required
@@ -3478,6 +3504,20 @@ func scoreSpeechCandidate(m *SpeechCandidateMetrics) float64 {
 //
 // Returns full SilenceCandidateMetrics with all amplitude, spectral, and loudness measurements.
 func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*SilenceCandidateMetrics, error) {
+	// Open the processed audio file
+	reader, _, err := audio.OpenAudioFile(outputPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open output file: %w", err)
+	}
+	defer reader.Close()
+
+	return measureOutputSilenceRegionFromReader(reader, region)
+}
+
+// measureOutputSilenceRegionFromReader performs the silence region measurement
+// using an already-opened audio reader. This enables the combined
+// MeasureOutputRegions function to share a single file open/close cycle.
+func measureOutputSilenceRegionFromReader(reader *audio.Reader, region SilenceRegion) (*SilenceCandidateMetrics, error) {
 	// Diagnostic logging: function entry with region details
 	debugLog("=== MeasureOutputSilenceRegion: start=%.3fs, duration=%.3fs ===",
 		region.Start.Seconds(), region.Duration.Seconds())
@@ -3490,13 +3530,6 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen
 		return nil, fmt.Errorf("invalid region: non-positive duration")
 	}
 
-	// Open the processed audio file
-	reader, _, err := audio.OpenAudioFile(outputPath)
-	if err != nil {
-		return nil, fmt.Errorf("failed to open output file: %w", err)
-	}
-	defer reader.Close()
-
 	// Build filter spec to extract and analyze the silence region
 	// Filter chain captures all measurements for comprehensive analysis:
 	// 1. atrim: extract the specific time region (start/duration format)
@@ -3836,6 +3869,57 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen
 	return metrics, nil
 }
 
+// MeasureOutputRegions measures both silence and speech regions from the same
+// output file in a single open/close cycle. This avoids redundant file opens,
+// demuxing, and decoding that would occur when calling MeasureOutputSilenceRegion
+// and MeasureOutputSpeechRegion independently.
+//
+// Either region parameter may be nil to skip that measurement. Returns nil for
+// any skipped or failed measurement (non-fatal — matches existing behaviour).
+func MeasureOutputRegions(outputPath string, silenceRegion *SilenceRegion, speechRegion *SpeechRegion) (*SilenceCandidateMetrics, *SpeechCandidateMetrics) {
+	if silenceRegion == nil && speechRegion == nil {
+		return nil, nil
+	}
+
+	// Open the output file once for both measurements
+	reader, _, err := audio.OpenAudioFile(outputPath)
+	if err != nil {
+		debugLog("Warning: Failed to open output file for region measurements: %v", err)
+		return nil, nil
+	}
+	defer reader.Close()
+
+	// Measure silence region first (if requested)
+	var silenceMetrics *SilenceCandidateMetrics
+	if silenceRegion != nil {
+		silenceMetrics, err = measureOutputSilenceRegionFromReader(reader, *silenceRegion)
+		if err != nil {
+			debugLog("Warning: Failed to measure silence region: %v", err)
+			// Non-fatal — continue to speech measurement
+		}
+	}
+
+	// Seek back to the beginning before measuring the speech region
+	if speechRegion != nil {
+		if silenceRegion != nil {
+			// Only need to seek if we already read through the file for silence
+			if err := reader.Seek(0); err != nil {
+				debugLog("Warning: Failed to seek for speech region measurement: %v", err)
+				return silenceMetrics, nil
+			}
+		}
+
+		speechMetrics, err := measureOutputSpeechRegionFromReader(reader, *speechRegion)
+		if err != nil {
+			debugLog("Warning: Failed to measure speech region: %v", err)
+			return silenceMetrics, nil
+		}
+		return silenceMetrics, speechMetrics
+	}
+
+	return silenceMetrics, nil
+}
+
 // MeasureOutputSpeechRegion analyses a speech region in the output file
 // to capture comprehensive metrics for adaptive filter tuning and validation.
 //
@@ -3844,6 +3928,20 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen
 //
 // Returns full SpeechCandidateMetrics with all amplitude, spectral, and loudness measurements.
 func MeasureOutputSpeechRegion(outputPath string, region SpeechRegion) (*SpeechCandidateMetrics, error) {
+	// Open the processed audio file
+	reader, _, err := audio.OpenAudioFile(outputPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open output file: %w", err)
+	}
+	defer reader.Close()
+
+	return measureOutputSpeechRegionFromReader(reader, region)
+}
+
+// measureOutputSpeechRegionFromReader performs the speech region measurement
+// using an already-opened audio reader. This enables the combined
+// MeasureOutputRegions function to share a single file open/close cycle.
+func measureOutputSpeechRegionFromReader(reader *audio.Reader, region SpeechRegion) (*SpeechCandidateMetrics, error) {
 	// Diagnostic logging: function entry with region details
 	debugLog("=== MeasureOutputSpeechRegion: start=%.3fs, duration=%.3fs ===",
 		region.Start.Seconds(), region.Duration.Seconds())
@@ -3856,13 +3954,6 @@ func MeasureOutputSpeechRegion(outputPath string, region SpeechRegion) (*SpeechC
 		return nil, fmt.Errorf("invalid region: non-positive duration")
 	}
 
-	// Open the processed audio file
-	reader, _, err := audio.OpenAudioFile(outputPath)
-	if err != nil {
-		return nil, fmt.Errorf("failed to open output file: %w", err)
-	}
-	defer reader.Close()
-
 	// Build filter spec to extract and analyze the speech region
 	// Filter chain captures all measurements for comprehensive analysis:
 	// 1. atrim: extract the specific time region (start/duration format)