diff --git a/internal/audio/reader.go b/internal/audio/reader.go index 3dd05be..31f149c 100644 --- a/internal/audio/reader.go +++ b/internal/audio/reader.go @@ -4,6 +4,7 @@ package audio import ( "errors" "fmt" + "math" ffmpeg "github.com/linuxmatters/ffmpeg-statigo" ) @@ -183,6 +184,17 @@ func (r *Reader) GetDecoderContext() *ffmpeg.AVCodecContext { return r.decCtx } +// Seek seeks to the specified timestamp in AV_TIME_BASE units. +// Use 0 to seek to the beginning of the file. After seeking, the decoder +// buffers are flushed so that subsequent ReadFrame calls return fresh data. +func (r *Reader) Seek(timestamp int64) error { + if _, err := ffmpeg.AVFormatSeekFile(r.fmtCtx, -1, math.MinInt64, timestamp, math.MaxInt64, 0); err != nil { + return fmt.Errorf("failed to seek: %w", err) + } + ffmpeg.AVCodecFlushBuffers(r.decCtx) + return nil +} + // Close releases all resources func (r *Reader) Close() { if r.frame != nil { diff --git a/internal/processor/analyzer.go b/internal/processor/analyzer.go index cb1ec57..a7e7b0d 100644 --- a/internal/processor/analyzer.go +++ b/internal/processor/analyzer.go @@ -792,15 +792,12 @@ func getIntervalsInRange(intervals []IntervalSample, start, end time.Duration) [ return nil } - // Find first interval at or after start time - startIdx := -1 - for i, interval := range intervals { - if interval.Timestamp >= start { - startIdx = i - break - } - } - if startIdx < 0 { + // Find first interval at or after start time using binary search + // (intervals are sorted by timestamp from the collection loop in AnalyzeAudio) + startIdx := sort.Search(len(intervals), func(i int) bool { + return intervals[i].Timestamp >= start + }) + if startIdx >= len(intervals) { return nil } @@ -1153,6 +1150,36 @@ func roomToneScore(interval IntervalSample, rmsP50, fluxP50 float64) float64 { return roomToneAmplitudeWeight*amplitudeScore + roomToneFluxWeight*fluxScore } +// silenceMedians holds pre-computed median values for silence/room-tone detection. +// Avoids redundant O(n log n) sorts when the same interval data is used by +// multiple detection functions. +type silenceMedians struct { + rmsP50 float64 + fluxP50 float64 +} + +// computeSilenceMedians calculates RMS and spectral flux medians from the +// search interval slice used for silence/room-tone detection. The caller +// passes the already-sliced searchIntervals (first silenceSearchPercent% of intervals). +func computeSilenceMedians(searchIntervals []IntervalSample) silenceMedians { + if len(searchIntervals) == 0 { + return silenceMedians{} + } + rmsLevels := make([]float64, len(searchIntervals)) + fluxValues := make([]float64, len(searchIntervals)) + for i, interval := range searchIntervals { + rmsLevels[i] = interval.RMSLevel + fluxValues[i] = interval.SpectralFlux + } + sort.Float64s(rmsLevels) + sort.Float64s(fluxValues) + + return silenceMedians{ + rmsP50: rmsLevels[len(rmsLevels)/2], + fluxP50: fluxValues[len(fluxValues)/2], + } +} + // estimateNoiseFloorAndThreshold analyses interval data to estimate noise floor and silence threshold. // Returns (noiseFloor, silenceThreshold, ok). If ok is false, fallback values should be used. // @@ -1163,7 +1190,7 @@ func roomToneScore(interval IntervalSample, rmsP50, fluxP50 float64) float64 { // // The noise floor is the max RMS of high-confidence room tone intervals. // The silence threshold adds headroom to the noise floor for detection margin. -func estimateNoiseFloorAndThreshold(intervals []IntervalSample) (noiseFloor, silenceThreshold float64, ok bool) { +func estimateNoiseFloorAndThreshold(intervals []IntervalSample, medians silenceMedians) (noiseFloor, silenceThreshold float64, ok bool) { if len(intervals) < silenceThresholdMinIntervals { return 0, 0, false } @@ -1175,18 +1202,9 @@ func estimateNoiseFloorAndThreshold(intervals []IntervalSample) (noiseFloor, sil } searchIntervals := intervals[:searchLimit] - // Calculate medians for scoring reference - rmsLevels := make([]float64, len(searchIntervals)) - fluxValues := make([]float64, len(searchIntervals)) - for i, interval := range searchIntervals { - rmsLevels[i] = interval.RMSLevel - fluxValues[i] = interval.SpectralFlux - } - sort.Float64s(rmsLevels) - sort.Float64s(fluxValues) - - rmsP50 := rmsLevels[len(rmsLevels)/2] - fluxP50 := fluxValues[len(fluxValues)/2] + // Use pre-computed medians for scoring reference + rmsP50 := medians.rmsP50 + fluxP50 := medians.fluxP50 // Score each interval for room tone likelihood type scoredInterval struct { @@ -1233,7 +1251,7 @@ func estimateNoiseFloorAndThreshold(intervals []IntervalSample) (noiseFloor, sil // Uses a room tone score approach that considers both amplitude and spectral stability. // // Detection algorithm: -// 1. Calculate reference values (medians) for room tone scoring +// 1. Use pre-computed reference values (medians) for room tone scoring // 2. Score each interval for "room tone likelihood" // 3. Use a score threshold (0.5) to identify room tone intervals // 4. Find consecutive runs that meet minimum duration (8 seconds) @@ -1241,7 +1259,7 @@ func estimateNoiseFloorAndThreshold(intervals []IntervalSample) (noiseFloor, sil // The RMS threshold parameter is used as a hard ceiling - intervals above it // cannot be silence regardless of spectral characteristics. // Candidates in the first 15 seconds are excluded (typically contains intro). -func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold float64, _ float64) []SilenceRegion { +func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold float64, medians silenceMedians) []SilenceRegion { if len(intervals) < minimumSilenceIntervals { return nil } @@ -1251,20 +1269,10 @@ func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold fl if searchLimit < minimumSilenceIntervals { searchLimit = minimumSilenceIntervals } - searchIntervals := intervals[:searchLimit] - - // Calculate medians for room tone scoring - rmsLevels := make([]float64, len(searchIntervals)) - fluxValues := make([]float64, len(searchIntervals)) - for i, interval := range searchIntervals { - rmsLevels[i] = interval.RMSLevel - fluxValues[i] = interval.SpectralFlux - } - sort.Float64s(rmsLevels) - sort.Float64s(fluxValues) - rmsP50 := rmsLevels[len(rmsLevels)/2] - fluxP50 := fluxValues[len(fluxValues)/2] + // Use pre-computed medians for room tone scoring + rmsP50 := medians.rmsP50 + fluxP50 := medians.fluxP50 var candidates []SilenceRegion var silenceStart time.Duration @@ -1750,14 +1758,13 @@ func extractSpectralMetrics(metadata *ffmpeg.AVDictionary) spectralMetrics { // extractIntervalFrameMetrics extracts per-frame metrics for interval accumulation. // Only collects metrics that are valid per-window (aspectralstats, ebur128 windowed). // Excludes astats which provides cumulative values, not per-interval. -func extractIntervalFrameMetrics(metadata *ffmpeg.AVDictionary) intervalFrameMetrics { +func extractIntervalFrameMetrics(metadata *ffmpeg.AVDictionary, spectral spectralMetrics) intervalFrameMetrics { var m intervalFrameMetrics // Peak level from astats (used for max tracking, which is valid per-interval) m.PeakLevel, _ = getFloatMetadata(metadata, metaKeyPeakLevel) - // aspectralstats metrics (valid per-window measurements) - spectral := extractSpectralMetrics(metadata) + // aspectralstats metrics (valid per-window measurements, pre-extracted by caller) m.SpectralMean = spectral.Mean m.SpectralVariance = spectral.Variance m.SpectralCentroid = spectral.Centroid @@ -1790,14 +1797,14 @@ func extractIntervalFrameMetrics(metadata *ffmpeg.AVDictionary) intervalFrameMet // extractFrameMetadata extracts audio analysis metadata from a filtered frame. // Updates accumulators with spectral, astats, and ebur128 measurements. // Called from both the main processing loop and the flush loop. -func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulators) { +func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulators, spectral spectralMetrics) { if metadata == nil { return } - // Extract all aspectralstats measurements (averaged across frames) + // Accumulate pre-extracted spectral metrics (averaged across frames) // For mono audio, spectral stats are under channel .1 - acc.accumulateSpectral(extractSpectralMetrics(metadata)) + acc.accumulateSpectral(spectral) // Extract astats measurements (cumulative, so we keep the latest) // For mono audio, stats are under channel .1 @@ -2299,12 +2306,16 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f return nil, fmt.Errorf("failed to get filtered frame: %w", err) } + // Extract spectral metrics once, reuse for both whole-file and interval accumulators + metadata := filteredFrame.Metadata() + spectral := extractSpectralMetrics(metadata) + // Extract measurements from frame metadata (whole-file accumulators) - extractFrameMetadata(filteredFrame.Metadata(), acc) + extractFrameMetadata(metadata, acc, spectral) // Also accumulate into current interval for per-interval spectral data // Filtered frames roughly correspond to input timing (just at higher sample rate) - intervalAcc.add(extractIntervalFrameMetrics(filteredFrame.Metadata())) + intervalAcc.add(extractIntervalFrameMetrics(metadata, spectral)) ffmpeg.AVFrameUnref(filteredFrame) } @@ -2324,11 +2335,15 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f return nil, fmt.Errorf("failed to get filtered frame: %w", err) } + // Extract spectral metrics once, reuse for both whole-file and interval accumulators + metadata := filteredFrame.Metadata() + spectral := extractSpectralMetrics(metadata) + // Extract measurements from remaining frames - extractFrameMetadata(filteredFrame.Metadata(), acc) + extractFrameMetadata(metadata, acc, spectral) // Also accumulate into current interval for per-interval spectral data - intervalAcc.add(extractIntervalFrameMetrics(filteredFrame.Metadata())) + intervalAcc.add(extractIntervalFrameMetrics(metadata, spectral)) ffmpeg.AVFrameUnref(filteredFrame) } @@ -2347,7 +2362,18 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f // Estimate noise floor and silence threshold from interval data // This replaces the previous separate pre-scan pass - noiseFloorEstimate, silenceThreshold, ok := estimateNoiseFloorAndThreshold(intervals) + + // Pre-compute silence detection medians (shared by noise estimation and candidate detection) + silSearchLimit := len(intervals) * silenceSearchPercent / 100 + if silSearchLimit < silenceThresholdMinIntervals { + silSearchLimit = silenceThresholdMinIntervals + } + if silSearchLimit > len(intervals) { + silSearchLimit = len(intervals) + } + silMedians := computeSilenceMedians(intervals[:silSearchLimit]) + + noiseFloorEstimate, silenceThreshold, ok := estimateNoiseFloorAndThreshold(intervals, silMedians) if !ok { // Fallback if insufficient interval data (very short recordings) noiseFloorEstimate = defaultNoiseFloor @@ -2473,7 +2499,7 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f // Detect silence regions using threshold already computed from interval distribution // The silenceThreshold was calculated above via estimateNoiseFloorAndThreshold() - measurements.SilenceRegions = findSilenceCandidatesFromIntervals(intervals, silenceThreshold, 0) + measurements.SilenceRegions = findSilenceCandidatesFromIntervals(intervals, silenceThreshold, silMedians) // Extract noise profile from best silence region (if available) // Uses interval data for all measurements - no file re-reading required @@ -3478,6 +3504,20 @@ func scoreSpeechCandidate(m *SpeechCandidateMetrics) float64 { // // Returns full SilenceCandidateMetrics with all amplitude, spectral, and loudness measurements. func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*SilenceCandidateMetrics, error) { + // Open the processed audio file + reader, _, err := audio.OpenAudioFile(outputPath) + if err != nil { + return nil, fmt.Errorf("failed to open output file: %w", err) + } + defer reader.Close() + + return measureOutputSilenceRegionFromReader(reader, region) +} + +// measureOutputSilenceRegionFromReader performs the silence region measurement +// using an already-opened audio reader. This enables the combined +// MeasureOutputRegions function to share a single file open/close cycle. +func measureOutputSilenceRegionFromReader(reader *audio.Reader, region SilenceRegion) (*SilenceCandidateMetrics, error) { // Diagnostic logging: function entry with region details debugLog("=== MeasureOutputSilenceRegion: start=%.3fs, duration=%.3fs ===", region.Start.Seconds(), region.Duration.Seconds()) @@ -3490,13 +3530,6 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen return nil, fmt.Errorf("invalid region: non-positive duration") } - // Open the processed audio file - reader, _, err := audio.OpenAudioFile(outputPath) - if err != nil { - return nil, fmt.Errorf("failed to open output file: %w", err) - } - defer reader.Close() - // Build filter spec to extract and analyze the silence region // Filter chain captures all measurements for comprehensive analysis: // 1. atrim: extract the specific time region (start/duration format) @@ -3836,6 +3869,57 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen return metrics, nil } +// MeasureOutputRegions measures both silence and speech regions from the same +// output file in a single open/close cycle. This avoids redundant file opens, +// demuxing, and decoding that would occur when calling MeasureOutputSilenceRegion +// and MeasureOutputSpeechRegion independently. +// +// Either region parameter may be nil to skip that measurement. Returns nil for +// any skipped or failed measurement (non-fatal — matches existing behaviour). +func MeasureOutputRegions(outputPath string, silenceRegion *SilenceRegion, speechRegion *SpeechRegion) (*SilenceCandidateMetrics, *SpeechCandidateMetrics) { + if silenceRegion == nil && speechRegion == nil { + return nil, nil + } + + // Open the output file once for both measurements + reader, _, err := audio.OpenAudioFile(outputPath) + if err != nil { + debugLog("Warning: Failed to open output file for region measurements: %v", err) + return nil, nil + } + defer reader.Close() + + // Measure silence region first (if requested) + var silenceMetrics *SilenceCandidateMetrics + if silenceRegion != nil { + silenceMetrics, err = measureOutputSilenceRegionFromReader(reader, *silenceRegion) + if err != nil { + debugLog("Warning: Failed to measure silence region: %v", err) + // Non-fatal — continue to speech measurement + } + } + + // Seek back to the beginning before measuring the speech region + if speechRegion != nil { + if silenceRegion != nil { + // Only need to seek if we already read through the file for silence + if err := reader.Seek(0); err != nil { + debugLog("Warning: Failed to seek for speech region measurement: %v", err) + return silenceMetrics, nil + } + } + + speechMetrics, err := measureOutputSpeechRegionFromReader(reader, *speechRegion) + if err != nil { + debugLog("Warning: Failed to measure speech region: %v", err) + return silenceMetrics, nil + } + return silenceMetrics, speechMetrics + } + + return silenceMetrics, nil +} + // MeasureOutputSpeechRegion analyses a speech region in the output file // to capture comprehensive metrics for adaptive filter tuning and validation. // @@ -3844,6 +3928,20 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen // // Returns full SpeechCandidateMetrics with all amplitude, spectral, and loudness measurements. func MeasureOutputSpeechRegion(outputPath string, region SpeechRegion) (*SpeechCandidateMetrics, error) { + // Open the processed audio file + reader, _, err := audio.OpenAudioFile(outputPath) + if err != nil { + return nil, fmt.Errorf("failed to open output file: %w", err) + } + defer reader.Close() + + return measureOutputSpeechRegionFromReader(reader, region) +} + +// measureOutputSpeechRegionFromReader performs the speech region measurement +// using an already-opened audio reader. This enables the combined +// MeasureOutputRegions function to share a single file open/close cycle. +func measureOutputSpeechRegionFromReader(reader *audio.Reader, region SpeechRegion) (*SpeechCandidateMetrics, error) { // Diagnostic logging: function entry with region details debugLog("=== MeasureOutputSpeechRegion: start=%.3fs, duration=%.3fs ===", region.Start.Seconds(), region.Duration.Seconds()) @@ -3856,13 +3954,6 @@ func MeasureOutputSpeechRegion(outputPath string, region SpeechRegion) (*SpeechC return nil, fmt.Errorf("invalid region: non-positive duration") } - // Open the processed audio file - reader, _, err := audio.OpenAudioFile(outputPath) - if err != nil { - return nil, fmt.Errorf("failed to open output file: %w", err) - } - defer reader.Close() - // Build filter spec to extract and analyze the speech region // Filter chain captures all measurements for comprehensive analysis: // 1. atrim: extract the specific time region (start/duration format) diff --git a/internal/processor/normalise.go b/internal/processor/normalise.go index 28769fa..c4537df 100644 --- a/internal/processor/normalise.go +++ b/internal/processor/normalise.go @@ -165,6 +165,10 @@ func measureWithLoudnorm(inputPath string, config *FilterChainConfig, progressCa } // Note: We free the filter graph explicitly to trigger loudnorm JSON output + // Allocate frame for pulling filtered output (reused across all iterations) + filteredFrame := ffmpeg.AVFrameAlloc() + defer ffmpeg.AVFrameFree(&filteredFrame) + // Process all frames through loudnorm (no encoding - just measurement) for { frame, err := reader.ReadFrame() @@ -181,14 +185,12 @@ func measureWithLoudnorm(inputPath string, config *FilterChainConfig, progressCa } // Pull filtered frames (discard - we only want the measurements) - filteredFrame := ffmpeg.AVFrameAlloc() for { if _, err := ffmpeg.AVBuffersinkGetFrame(bufferSinkCtx, filteredFrame); err != nil { break } ffmpeg.AVFrameUnref(filteredFrame) } - ffmpeg.AVFrameFree(&filteredFrame) // Progress update periodically (every N frames for smooth updates) frameCount++ @@ -200,14 +202,12 @@ func measureWithLoudnorm(inputPath string, config *FilterChainConfig, progressCa // Flush filter graph if _, err := ffmpeg.AVBuffersrcAddFrameFlags(bufferSrcCtx, nil, 0); err == nil { - filteredFrame := ffmpeg.AVFrameAlloc() for { if _, err := ffmpeg.AVBuffersinkGetFrame(bufferSinkCtx, filteredFrame); err != nil { break } ffmpeg.AVFrameUnref(filteredFrame) } - ffmpeg.AVFrameFree(&filteredFrame) } // Free filter graph to trigger loudnorm JSON output @@ -559,6 +559,10 @@ func applyLoudnormAndMeasure( var samplesProcessed int64 const progressUpdateInterval = 100 // Send progress update every N frames + // Allocate frame for pulling filtered output (reused across all iterations) + filteredFrame := ffmpeg.AVFrameAlloc() + defer ffmpeg.AVFrameFree(&filteredFrame) + for { frame, err := reader.ReadFrame() if err != nil { @@ -577,7 +581,6 @@ func applyLoudnormAndMeasure( } // Pull filtered frames - filteredFrame := ffmpeg.AVFrameAlloc() for { if _, err := ffmpeg.AVBuffersinkGetFrame(bufferSinkCtx, filteredFrame); err != nil { if errors.Is(err, ffmpeg.EAgain) || errors.Is(err, ffmpeg.AVErrorEOF) { @@ -593,7 +596,6 @@ func applyLoudnormAndMeasure( // Encode frame if err := encoder.WriteFrame(filteredFrame); err != nil { ffmpeg.AVFrameUnref(filteredFrame) - ffmpeg.AVFrameFree(&filteredFrame) ffmpeg.AVFilterGraphFree(&filterGraph) return 0.0, 0.0, nil, getLoudnormStats(), fmt.Errorf("encoding failed: %w", err) } @@ -601,7 +603,6 @@ func applyLoudnormAndMeasure( framesProcessed++ ffmpeg.AVFrameUnref(filteredFrame) } - ffmpeg.AVFrameFree(&filteredFrame) // Progress update periodically (every N input frames for smooth updates) if progressCallback != nil && framesProcessed%progressUpdateInterval == 0 { @@ -612,7 +613,6 @@ func applyLoudnormAndMeasure( // Flush filter graph if _, err := ffmpeg.AVBuffersrcAddFrameFlags(bufferSrcCtx, nil, 0); err == nil { - filteredFrame := ffmpeg.AVFrameAlloc() for { if _, err := ffmpeg.AVBuffersinkGetFrame(bufferSinkCtx, filteredFrame); err != nil { break @@ -623,14 +623,12 @@ func applyLoudnormAndMeasure( if err := encoder.WriteFrame(filteredFrame); err != nil { ffmpeg.AVFrameUnref(filteredFrame) - ffmpeg.AVFrameFree(&filteredFrame) ffmpeg.AVFilterGraphFree(&filterGraph) return 0.0, 0.0, nil, getLoudnormStats(), fmt.Errorf("encoding failed during flush: %w", err) } ffmpeg.AVFrameUnref(filteredFrame) } - ffmpeg.AVFrameFree(&filteredFrame) } // Flush encoder @@ -660,32 +658,30 @@ func applyLoudnormAndMeasure( // Build complete OutputMeasurements from accumulators finalMeasurements := finalizeOutputMeasurements(&acc) - // Measure silence region in final output (same region as Pass 1 noise profile) + // Measure silence and speech regions in final output (same regions as Pass 1 profiles) // NOTE: inputPath now contains the normalised output after os.Rename above - if inputMeasurements != nil && inputMeasurements.NoiseProfile != nil { - silenceRegion := SilenceRegion{ - Start: inputMeasurements.NoiseProfile.Start, - End: inputMeasurements.NoiseProfile.Start + inputMeasurements.NoiseProfile.Duration, - Duration: inputMeasurements.NoiseProfile.Duration, - } - if silenceSample, err := MeasureOutputSilenceRegion(inputPath, silenceRegion); err == nil { - finalMeasurements.SilenceSample = silenceSample + if inputMeasurements != nil { + var silRegion *SilenceRegion + var spRegion *SpeechRegion + if inputMeasurements.NoiseProfile != nil { + silRegion = &SilenceRegion{ + Start: inputMeasurements.NoiseProfile.Start, + End: inputMeasurements.NoiseProfile.Start + inputMeasurements.NoiseProfile.Duration, + Duration: inputMeasurements.NoiseProfile.Duration, + } } - // Non-fatal if measurement fails - we still have the other output measurements - } - - // Measure speech region in final output (same region as Pass 1 speech profile) - // NOTE: inputPath now contains the normalised output after os.Rename above - if inputMeasurements != nil && inputMeasurements.SpeechProfile != nil { - speechRegion := SpeechRegion{ - Start: inputMeasurements.SpeechProfile.Region.Start, - End: inputMeasurements.SpeechProfile.Region.End, - Duration: inputMeasurements.SpeechProfile.Region.Duration, + if inputMeasurements.SpeechProfile != nil { + spRegion = &SpeechRegion{ + Start: inputMeasurements.SpeechProfile.Region.Start, + End: inputMeasurements.SpeechProfile.Region.End, + Duration: inputMeasurements.SpeechProfile.Region.Duration, + } } - if speechSample, err := MeasureOutputSpeechRegion(inputPath, speechRegion); err == nil { - finalMeasurements.SpeechSample = speechSample + if silRegion != nil || spRegion != nil { + silSample, spSample := MeasureOutputRegions(inputPath, silRegion, spRegion) + finalMeasurements.SilenceSample = silSample + finalMeasurements.SpeechSample = spSample } - // Non-fatal if measurement fails - we still have the other output measurements } return acc.ebur128OutputI, acc.ebur128OutputTP, finalMeasurements, stats, nil diff --git a/internal/processor/processor.go b/internal/processor/processor.go index 1aa97a9..265ad39 100644 --- a/internal/processor/processor.go +++ b/internal/processor/processor.go @@ -88,36 +88,29 @@ func ProcessAudio(inputPath string, config *FilterChainConfig, progressCallback progressCallback(2, "Processing", 1.0, 0.0, measurements) } - // Measure silence region in Pass 2 output (before normalisation) for noise comparison - if filteredMeasurements != nil && measurements.NoiseProfile != nil { - silenceRegion := SilenceRegion{ - Start: measurements.NoiseProfile.Start, - End: measurements.NoiseProfile.Start + measurements.NoiseProfile.Duration, - Duration: measurements.NoiseProfile.Duration, - } - if silenceSample, err := MeasureOutputSilenceRegion(outputPath, silenceRegion); err == nil { - filteredMeasurements.SilenceSample = silenceSample - } else { - // Log the error for debugging but don't fail the entire processing - debugLog("Warning: Failed to measure Pass 2 silence region: %v", err) + // Measure silence and speech regions in Pass 2 output (before normalisation) for comparison + if filteredMeasurements != nil { + var silRegion *SilenceRegion + var spRegion *SpeechRegion + if measurements.NoiseProfile != nil { + silRegion = &SilenceRegion{ + Start: measurements.NoiseProfile.Start, + End: measurements.NoiseProfile.Start + measurements.NoiseProfile.Duration, + Duration: measurements.NoiseProfile.Duration, + } } - // Non-fatal if measurement fails - we still have the other output measurements - } - - // Measure speech region in Pass 2 output (before normalisation) for processing comparison - if filteredMeasurements != nil && measurements.SpeechProfile != nil { - speechRegion := SpeechRegion{ - Start: measurements.SpeechProfile.Region.Start, - End: measurements.SpeechProfile.Region.End, - Duration: measurements.SpeechProfile.Region.Duration, + if measurements.SpeechProfile != nil { + spRegion = &SpeechRegion{ + Start: measurements.SpeechProfile.Region.Start, + End: measurements.SpeechProfile.Region.End, + Duration: measurements.SpeechProfile.Region.Duration, + } } - if speechSample, err := MeasureOutputSpeechRegion(outputPath, speechRegion); err == nil { - filteredMeasurements.SpeechSample = speechSample - } else { - // Log the error for debugging but don't fail the entire processing - debugLog("Warning: Failed to measure Pass 2 speech region: %v", err) + if silRegion != nil || spRegion != nil { + silSample, spSample := MeasureOutputRegions(outputPath, silRegion, spRegion) + filteredMeasurements.SilenceSample = silSample + filteredMeasurements.SpeechSample = spSample } - // Non-fatal if measurement fails - we still have the other output measurements } // Pass 3/4: Normalisation (measurement + loudnorm application)