From c76cd48f77ed720d00690dcb1f199a9591e6459e Mon Sep 17 00:00:00 2001 From: Martin Wimpress Date: Fri, 6 Feb 2026 17:23:47 +0000 Subject: [PATCH 1/6] refactor: remove legacy helpers and unify numeric helpers - Remove legacy/unused helpers and diagnostic placeholders: - delete formatNormalisationResult, writeDiagnosticAdaptive, joinWithComma - remove calculateLUFSGap, lerp, clampFloat, and SilenceAnalysis type - drop legacy Entropy fields used only for backwards compatibility - Standardise numeric helper usage in analyzer: - replace ad-hoc 20*log10(...) conversions with linearRatioToDB - replace clampFloat calls with shared clamp(...) helper - Trim obsolete diagnostics from internal/logging/report.go and remove placeholder adaptive diagnostics in adaptive.go Signed-off-by: Martin Wimpress --- internal/logging/report.go | 119 --------------------------------- internal/processor/adaptive.go | 15 ----- internal/processor/analyzer.go | 92 +++++-------------------- 3 files changed, 17 insertions(+), 209 deletions(-) diff --git a/internal/logging/report.go b/internal/logging/report.go index 7f898b4..132b714 100644 --- a/internal/logging/report.go +++ b/internal/logging/report.go @@ -356,88 +356,6 @@ func GenerateReport(data ReportData) error { return nil } -// formatNormalisationResult outputs the loudnorm normalisation pass details -func formatNormalisationResult(f *os.File, result *processor.NormalisationResult, config *processor.FilterChainConfig) { - writeSection(f, "Pass 3: Loudnorm Measurement") - - if result == nil || !config.LoudnormEnabled { - fmt.Fprintln(f, "Status: DISABLED") - return - } - - if result.Skipped { - fmt.Fprintln(f, "Status: SKIPPED") - return - } - - fmt.Fprintln(f, "Status: APPLIED") - fmt.Fprintln(f, "") - fmt.Fprintln(f, "Pre-normalisation (Pass 2 output):") - fmt.Fprintf(f, " Integrated loudness: %.1f LUFS\n", result.InputLUFS) - fmt.Fprintf(f, " True peak: %.1f dBTP\n", result.InputTP) - fmt.Fprintln(f, "") - - writeSection(f, "Pass 4: Loudnorm Normalisation") - fmt.Fprintln(f, "") - fmt.Fprintln(f, "Loudnorm configuration:") - if result.LinearModeForced { - fmt.Fprintf(f, " Target I: %.1f LUFS (adjusted from %.1f to preserve linear mode)\n", - result.EffectiveTargetI, result.RequestedTargetI) - } else { - fmt.Fprintf(f, " Target I: %.1f LUFS\n", config.LoudnormTargetI) - } - fmt.Fprintf(f, " Target TP: %.1f dBTP\n", config.LoudnormTargetTP) - fmt.Fprintf(f, " Target LRA: %.1f LU\n", config.LoudnormTargetLRA) - fmt.Fprintf(f, " Mode: %s\n", loudnormModeString(config.LoudnormLinear)) - fmt.Fprintf(f, " Dual mono: %v\n", config.LoudnormDualMono) - fmt.Fprintf(f, " Offset: %+.2f dB\n", result.GainApplied) - - // Display loudnorm measurement (from Pass 3, used for Pass 4 parameters) - fmt.Fprintln(f, "") - fmt.Fprintln(f, "Loudnorm measurement (from Pass 3):") - fmt.Fprintf(f, " Input I: %.2f LUFS\n", result.InputLUFS) - fmt.Fprintf(f, " Input TP: %.2f dBTP\n", result.InputTP) - fmt.Fprintf(f, " Target Offset: %.2f dB (from loudnorm, used in Pass 4)\n", result.GainApplied) - - // Display loudnorm filter's second pass stats (parsed from JSON output) - if result.LoudnormStats != nil { - stats := result.LoudnormStats - fmt.Fprintln(f, "") - fmt.Fprintln(f, "Loudnorm second pass diagnostics:") - fmt.Fprintf(f, " Input I: %s LUFS\n", stats.InputI) - fmt.Fprintf(f, " Input TP: %s dBTP\n", stats.InputTP) - fmt.Fprintf(f, " Input LRA: %s LU\n", stats.InputLRA) - fmt.Fprintf(f, " Input Thresh: %s LUFS\n", stats.InputThresh) - fmt.Fprintf(f, " Output I: %s LUFS\n", stats.OutputI) - fmt.Fprintf(f, " Output TP: %s dBTP\n", stats.OutputTP) - fmt.Fprintf(f, " Output LRA: %s LU\n", stats.OutputLRA) - fmt.Fprintf(f, " Output Thresh: %s LUFS\n", stats.OutputThresh) - fmt.Fprintf(f, " Norm Type: %s\n", stats.NormalizationType) - fmt.Fprintf(f, " Target Offset: %s dB\n", stats.TargetOffset) - } - - fmt.Fprintln(f, "") - fmt.Fprintln(f, "Post-normalisation:") - fmt.Fprintf(f, " Integrated loudness: %.1f LUFS\n", result.OutputLUFS) - fmt.Fprintf(f, " True peak: %.1f dBTP\n", result.OutputTP) - - fmt.Fprintln(f, "") - // Calculate deviation from effective target (what loudnorm was actually targeting) - effectiveDeviation := math.Abs(result.OutputLUFS - result.EffectiveTargetI) - if result.WithinTarget { - if result.LinearModeForced { - // Target was adjusted to preserve linear mode - requestedDeviation := math.Abs(result.OutputLUFS - result.RequestedTargetI) - fmt.Fprintf(f, "Result: ✓ Linear mode preserved (%.2f LU from effective target, %.2f LU from requested)\n", - effectiveDeviation, requestedDeviation) - } else { - fmt.Fprintf(f, "Result: ✓ Within target (deviation: %.2f LU)\n", effectiveDeviation) - } - } else { - fmt.Fprintf(f, "Result: ⚠ Outside tolerance (deviation: %.2f LU)\n", effectiveDeviation) - } -} - // loudnormModeString converts linear bool to readable mode string func loudnormModeString(linear bool) string { if linear { @@ -682,18 +600,6 @@ func formatNoiseRemoveFilter(f *os.File, cfg *processor.FilterChainConfig, m *pr cfg.NoiseRemoveCompandKnee) } -// joinWithComma joins string slice with comma separator -func joinWithComma(items []string) string { - result := "" - for i, item := range items { - if i > 0 { - result += ", " - } - result += item - } - return result -} - // formatDS201GateFilter outputs DS201-inspired gate filter details func formatDS201GateFilter(f *os.File, cfg *processor.FilterChainConfig, m *processor.AudioMeasurements, prefix string) { if !cfg.DS201GateEnabled { @@ -2443,15 +2349,6 @@ func writeDiagnosticLoudnorm(f *os.File, result *processor.NormalisationResult, fmt.Fprintln(f, "") } -// writeDiagnosticAdaptive outputs detailed adaptive parameter diagnostics. -// This section is filled by the existing formatFilterChain function. -// For now, we just write a header - the actual content comes from writeFilterChainApplied. -func writeDiagnosticAdaptive(f *os.File, config *processor.FilterChainConfig, measurements *processor.AudioMeasurements) { - // The filter chain section already contains adaptive rationale for each filter. - // This function is a placeholder for additional adaptive debugging if needed. - // Currently, all adaptive info is in writeFilterChainApplied. -} - // getFinalMeasurements safely extracts final measurements from the result. func getFinalMeasurements(result *processor.ProcessingResult) *processor.OutputMeasurements { if result == nil || result.NormResult == nil { @@ -2459,19 +2356,3 @@ func getFinalMeasurements(result *processor.ProcessingResult) *processor.OutputM } return result.NormResult.FinalMeasurements } - -// getFilteredNoise safely extracts filtered noise profile from the result. -func getFilteredNoise(result *processor.ProcessingResult) *processor.SilenceCandidateMetrics { - if result == nil || result.FilteredMeasurements == nil { - return nil - } - return result.FilteredMeasurements.SilenceSample -} - -// getFinalNoise safely extracts final noise profile from the result. -func getFinalNoise(result *processor.ProcessingResult) *processor.SilenceCandidateMetrics { - if result == nil || result.NormResult == nil || result.NormResult.FinalMeasurements == nil { - return nil - } - return result.NormResult.FinalMeasurements.SilenceSample -} diff --git a/internal/processor/adaptive.go b/internal/processor/adaptive.go index ecb5da3..248fb10 100644 --- a/internal/processor/adaptive.go +++ b/internal/processor/adaptive.go @@ -408,15 +408,6 @@ func AdaptConfig(config *FilterChainConfig, measurements *AudioMeasurements) { sanitizeConfig(config) } -// calculateLUFSGap returns the dB difference between target and input LUFS. -// Returns 0.0 if input is not measured. -func calculateLUFSGap(targetI, inputI float64) float64 { - if inputI != 0.0 { - return targetI - inputI - } - return 0.0 -} - // tuneDS201HighPass adapts DS201-inspired highpass composite filter based on: // - Spectral centroid (voice brightness/warmth) // - Spectral decrease (LF voice content - protects warm voices) @@ -1551,9 +1542,3 @@ func clamp(val, min, max float64) float64 { } return val } - -// lerp performs linear interpolation between a and b based on t (0-1). -// When t=0, returns a. When t=1, returns b. -func lerp(a, b, t float64) float64 { - return a + (b-a)*t -} diff --git a/internal/processor/analyzer.go b/internal/processor/analyzer.go index fa8c36b..14acd19 100644 --- a/internal/processor/analyzer.go +++ b/internal/processor/analyzer.go @@ -95,9 +95,6 @@ type SilenceCandidateMetrics struct { TruePeak float64 // dBTP, max true peak across region SamplePeak float64 // dBFS, max sample peak across region - // Legacy field for compatibility (same as SpectralEntropy) - Entropy float64 // Deprecated: use SpectralEntropy instead - // Warning flags (populated during scoring) TransientWarning string `json:"transient_warning,omitempty"` // Warning if danger zone signature detected @@ -691,7 +688,7 @@ func calculateFluxScore(flux float64) float64 { // Density at or above voicingDensityThreshold (60%) scores 1.0. // Lower densities score proportionally less. func calculateVoicingScore(voicingDensity float64) float64 { - return clampFloat(voicingDensity/voicingDensityThreshold, 0.0, 1.0) + return clamp(voicingDensity/voicingDensityThreshold, 0.0, 1.0) } const ( @@ -883,8 +880,6 @@ func measureSilenceCandidateFromIntervals(region SilenceRegion, intervals []Inte TruePeak: truePeakMax, SamplePeak: samplePeakMax, - Entropy: avgEntropy, // Legacy field for compatibility - StabilityScore: calculateStabilityScore(regionIntervals), } } @@ -1032,11 +1027,11 @@ func scoreSpeechIntervalWindow(intervals []IntervalSample) float64 { // Kurtosis score: higher kurtosis = clearer harmonics // Typical speech kurtosis ranges 5-10; score peaks around 7.5 (mid-point) // Reference: Gaussian kurtosis=3; speech harmonic structure produces 5-10 - kurtosisScore := clampFloat(avgKurtosis/7.5, 0.0, 1.0) + kurtosisScore := clamp(avgKurtosis/7.5, 0.0, 1.0) // Flatness score: lower flatness = more tonal = better speech // Flatness 0 = pure tone, 1 = white noise; speech typically 0.1-0.4 - flatnessScore := clampFloat(1.0-avgFlatness, 0.0, 1.0) + flatnessScore := clamp(1.0-avgFlatness, 0.0, 1.0) // Centroid score: peak at voice centre, decay toward edges // Voice range: speechCentroidMin (200 Hz) to speechCentroidMax (4500 Hz) @@ -1052,13 +1047,13 @@ func scoreSpeechIntervalWindow(intervals []IntervalSample) float64 { // Consistency score: low kurtosis variance = stable voicing // Variance > 100 is very inconsistent; clamp score at that point - consistencyScore := clampFloat(1.0-(kurtosisVariance/100.0), 0.0, 1.0) + consistencyScore := clamp(1.0-(kurtosisVariance/100.0), 0.0, 1.0) // RMS score: louder = more active speech // Range: -30 dBFS (worst) to -12 dBFS (best) rmsScore := 0.0 if avgRMS > -30.0 { - rmsScore = clampFloat((avgRMS-(-30.0))/18.0, 0.0, 1.0) + rmsScore = clamp((avgRMS-(-30.0))/18.0, 0.0, 1.0) } // Rolloff score: prefer regions with rolloff in typical voiced speech range. @@ -1829,22 +1824,11 @@ func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulato } if value, ok := getFloatMetadata(metadata, metaKeyEbur128TruePeak); ok { - // ebur128 reports true_peak as linear ratio, convert to dBTP - // dBTP = 20 * log10(linear) - if value > 0 { - acc.ebur128InputTP = 20 * math.Log10(value) - } else { - acc.ebur128InputTP = -120.0 // Floor for zero/negative values - } + acc.ebur128InputTP = linearRatioToDB(value) } - // Sample peak (linear ratio, convert to dB) if value, ok := getFloatMetadata(metadata, metaKeyEbur128SamplePeak); ok { - if value > 0 { - acc.ebur128InputSP = 20 * math.Log10(value) - } else { - acc.ebur128InputSP = -120.0 - } + acc.ebur128InputSP = linearRatioToDB(value) } if value, ok := getFloatMetadata(metadata, metaKeyEbur128LRA); ok { @@ -1997,17 +1981,6 @@ type AudioMeasurements struct { NoiseReductionHeadroom float64 `json:"noise_reduction_headroom"` // dB gap between noise and quiet speech } -// SilenceAnalysis contains measurements from a silence region. -// Used for comparing noise characteristics between input and output. -type SilenceAnalysis struct { - Start time.Duration `json:"start"` // Start time of silence region - Duration time.Duration `json:"duration"` // Duration of silence region - NoiseFloor float64 `json:"noise_floor"` // dBFS, RMS level of silence (average noise) - PeakLevel float64 `json:"peak_level"` // dBFS, peak level in silence - CrestFactor float64 `json:"crest_factor"` // Peak - RMS in dB - Entropy float64 `json:"entropy"` // Signal randomness (1.0 = white noise, lower = tonal) -} - // OutputMeasurements contains the measurements from Pass 2 output analysis. // Uses BaseMeasurements for comparison with AudioMeasurements. // Does not include silence detection or noise profile fields (those are input-only). @@ -2165,20 +2138,10 @@ func extractOutputFrameMetadata(metadata *ffmpeg.AVDictionary, acc *outputMetada acc.ebur128OutputS = value } if value, ok := getFloatMetadata(metadata, metaKeyEbur128TruePeak); ok { - // ebur128 reports true_peak as linear ratio, convert to dBTP - // dBTP = 20 * log10(linear) - if value > 0 { - acc.ebur128OutputTP = 20 * math.Log10(value) - } else { - acc.ebur128OutputTP = -120.0 // Floor for zero/negative values - } + acc.ebur128OutputTP = linearRatioToDB(value) } if value, ok := getFloatMetadata(metadata, metaKeyEbur128SamplePeak); ok { - if value > 0 { - acc.ebur128OutputSP = 20 * math.Log10(value) - } else { - acc.ebur128OutputSP = -120.0 - } + acc.ebur128OutputSP = linearRatioToDB(value) } if value, ok := getFloatMetadata(metadata, metaKeyEbur128LRA); ok { acc.ebur128OutputLRA = value @@ -3065,8 +3028,8 @@ func calculateStabilityScore(intervals []IntervalSample) float64 { // // RMS variance: 0 dB² (perfect) to 9 dB² (3 dB std dev, poor) // Flux: 0 (perfect) to 0.02 (stability threshold) - rmsStabilityScore := clampFloat(1.0-(rmsVariance/9.0), 0.0, 1.0) - fluxStabilityScore := clampFloat(1.0-(avgFlux/0.02), 0.0, 1.0) + rmsStabilityScore := clamp(1.0-(rmsVariance/9.0), 0.0, 1.0) + fluxStabilityScore := clamp(1.0-(avgFlux/0.02), 0.0, 1.0) // Combine: RMS variance more important (direct amplitude stability) return rmsStabilityScore*0.6 + fluxStabilityScore*0.4 @@ -3151,7 +3114,7 @@ func calculateSpectralScore(centroid, flatness, kurtosis float64) float64 { // Kurtosis score: lower = less peaked = better // Normalise: 0 → 1.0, 20+ → 0.0 - kurtosisScore := 1.0 - clampFloat(kurtosis/20.0, 0.0, 1.0) + kurtosisScore := 1.0 - clamp(kurtosis/20.0, 0.0, 1.0) // Combine with weights from the spec return centroidScore*0.5 + flatnessScore*0.3 + kurtosisScore*0.2 @@ -3204,17 +3167,6 @@ func calculateDurationScore(duration time.Duration) float64 { return math.Exp(-0.5 * (diff / sigmaSecs) * (diff / sigmaSecs)) } -// clampFloat clamps a value to the range [min, max]. -func clampFloat(value, min, max float64) float64 { - if value < min { - return min - } - if value > max { - return max - } - return value -} - // speechScore calculates how speech-like an interval is. // Returns 0.0-1.0 where higher = more likely to be speech. // Inverts silence detection criteria: rewards amplitude, voice-range centroid, low entropy. @@ -3230,7 +3182,7 @@ func speechScore(interval IntervalSample, rmsP50, centroidP50 float64) float64 { if interval.RMSLevel >= rmsP50 { // Above median: score increases up to +6dB boost := interval.RMSLevel - rmsP50 - ampScore = clampFloat(boost/6.0, 0.0, 1.0) + ampScore = clamp(boost/6.0, 0.0, 1.0) } // Centroid score: voice range (200-4500 Hz) = good @@ -3510,7 +3462,7 @@ func findBestSpeechRegion(regions []SpeechRegion, intervals []IntervalSample, no // Apply penalty factor rather than rejecting outright // This allows selection if no better candidates exist snrPenalty := snrMargin / minSNRMargin // 0.0 to 1.0 - score *= clampFloat(snrPenalty, 0.1, 1.0) + score *= clamp(snrPenalty, 0.1, 1.0) metrics.Score = score } } else { @@ -3577,7 +3529,7 @@ func scoreSpeechCandidate(m *SpeechCandidateMetrics) float64 { // Amplitude score: louder speech = better sample ampScore := 0.0 if m.RMSLevel > -30.0 { - ampScore = clampFloat((m.RMSLevel-(-30.0))/18.0, 0.0, 1.0) + ampScore = clamp((m.RMSLevel-(-30.0))/18.0, 0.0, 1.0) } // Centroid score: voice range = good @@ -3592,11 +3544,11 @@ func scoreSpeechCandidate(m *SpeechCandidateMetrics) float64 { if m.CrestFactor >= crestFactorMin && m.CrestFactor <= crestFactorMax { distFromIdeal := math.Abs(m.CrestFactor - crestFactorIdeal) maxDist := max(crestFactorIdeal-crestFactorMin, crestFactorMax-crestFactorIdeal) - crestScore = clampFloat(1.0-(distFromIdeal/maxDist), 0.0, 1.0) + crestScore = clamp(1.0-(distFromIdeal/maxDist), 0.0, 1.0) } // Duration score: longer = better (up to 60s, then plateau) - durScore := clampFloat(m.Region.Duration.Seconds()/60.0, 0.0, 1.0) + durScore := clamp(m.Region.Duration.Seconds()/60.0, 0.0, 1.0) // Voicing density score: prefer high voiced content proportion // Uses shared helper function for consistency with scoreSpeechIntervalWindow @@ -3678,7 +3630,6 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen var rmsLevel float64 var peakLevel float64 var crestFactor float64 - var entropy float64 var momentaryLUFS float64 var shortTermLUFS float64 var truePeak float64 @@ -3738,9 +3689,6 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen if value, ok := getFloatMetadata(metadata, metaKeyOverallCrestFactor); ok { crestFactor = value } - if value, ok := getFloatMetadata(metadata, metaKeyOverallEntropy); ok { - entropy = value - } // aspectralstats spectral measurements - accumulate for averaging spectralFound := false @@ -3827,9 +3775,6 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen if value, ok := getFloatMetadata(metadata, metaKeyOverallCrestFactor); ok { crestFactor = value } - if value, ok := getFloatMetadata(metadata, metaKeyOverallEntropy); ok { - entropy = value - } // aspectralstats spectral measurements - accumulate for averaging spectralFound := false @@ -3985,9 +3930,6 @@ func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*Silen ShortTermLUFS: shortTermLUFS, TruePeak: truePeakDB, SamplePeak: samplePeakDB, - - // Legacy entropy field from astats (for compatibility) - Entropy: entropy, } if !rmsLevelFound { From 65a9ded9c25dbd962097fcb4962f3b9ca6076024 Mon Sep 17 00:00:00 2001 From: Martin Wimpress Date: Fri, 6 Feb 2026 17:28:51 +0000 Subject: [PATCH 2/6] refactor(processor): extract spectral accumulation into helper method - Add baseMetadataAccumulators.accumulateSpectral to centralise spectral accumulation logic - Replace duplicated accumulation blocks in extractFrameMetadata and extractOutputFrameMetadata with a call to the new helper - Reduce code duplication and improve maintainability; no functional change Signed-off-by: Martin Wimpress --- internal/processor/analyzer.go | 57 ++++++++++++++-------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/internal/processor/analyzer.go b/internal/processor/analyzer.go index 14acd19..824a178 100644 --- a/internal/processor/analyzer.go +++ b/internal/processor/analyzer.go @@ -1485,6 +1485,27 @@ type baseMetadataAccumulators struct { astatsFound bool } +// accumulateSpectral adds the given spectral measurements to the running sums. +func (b *baseMetadataAccumulators) accumulateSpectral(spectral spectralMetrics) { + if !spectral.Found { + return + } + b.spectralMeanSum += spectral.Mean + b.spectralVarianceSum += spectral.Variance + b.spectralCentroidSum += spectral.Centroid + b.spectralSpreadSum += spectral.Spread + b.spectralSkewnessSum += spectral.Skewness + b.spectralKurtosisSum += spectral.Kurtosis + b.spectralEntropySum += spectral.Entropy + b.spectralFlatnessSum += spectral.Flatness + b.spectralCrestSum += spectral.Crest + b.spectralFluxSum += spectral.Flux + b.spectralSlopeSum += spectral.Slope + b.spectralDecreaseSum += spectral.Decrease + b.spectralRolloffSum += spectral.Rolloff + b.spectralFrameCount++ +} + // metadataAccumulators holds accumulator variables for Pass 1 frame metadata extraction. // Uses baseMetadataAccumulators for spectral and astats fields shared with output analysis. type metadataAccumulators struct { @@ -1679,23 +1700,7 @@ func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulato // Extract all aspectralstats measurements (averaged across frames) // For mono audio, spectral stats are under channel .1 - spectral := extractSpectralMetrics(metadata) - if spectral.Found { - acc.spectralMeanSum += spectral.Mean - acc.spectralVarianceSum += spectral.Variance - acc.spectralCentroidSum += spectral.Centroid - acc.spectralSpreadSum += spectral.Spread - acc.spectralSkewnessSum += spectral.Skewness - acc.spectralKurtosisSum += spectral.Kurtosis - acc.spectralEntropySum += spectral.Entropy - acc.spectralFlatnessSum += spectral.Flatness - acc.spectralCrestSum += spectral.Crest - acc.spectralFluxSum += spectral.Flux - acc.spectralSlopeSum += spectral.Slope - acc.spectralDecreaseSum += spectral.Decrease - acc.spectralRolloffSum += spectral.Rolloff - acc.spectralFrameCount++ - } + acc.accumulateSpectral(extractSpectralMetrics(metadata)) // Extract astats measurements (cumulative, so we keep the latest) // For mono audio, stats are under channel .1 @@ -2040,23 +2045,7 @@ func extractOutputFrameMetadata(metadata *ffmpeg.AVDictionary, acc *outputMetada } // Extract all aspectralstats measurements (averaged across frames) - spectral := extractSpectralMetrics(metadata) - if spectral.Found { - acc.spectralMeanSum += spectral.Mean - acc.spectralVarianceSum += spectral.Variance - acc.spectralCentroidSum += spectral.Centroid - acc.spectralSpreadSum += spectral.Spread - acc.spectralSkewnessSum += spectral.Skewness - acc.spectralKurtosisSum += spectral.Kurtosis - acc.spectralEntropySum += spectral.Entropy - acc.spectralFlatnessSum += spectral.Flatness - acc.spectralCrestSum += spectral.Crest - acc.spectralFluxSum += spectral.Flux - acc.spectralSlopeSum += spectral.Slope - acc.spectralDecreaseSum += spectral.Decrease - acc.spectralRolloffSum += spectral.Rolloff - acc.spectralFrameCount++ - } + acc.accumulateSpectral(extractSpectralMetrics(metadata)) // Extract astats measurements (cumulative, so we keep the latest) if value, ok := getFloatMetadata(metadata, metaKeyDynamicRange); ok { From b515d7753a602bb1a9b7aa326d37f9c6ea1cdedb Mon Sep 17 00:00:00 2001 From: Martin Wimpress Date: Fri, 6 Feb 2026 17:33:03 +0000 Subject: [PATCH 3/6] refactor(processor): extract astats metadata into helper - Add baseMetadataAccumulators.extractAstatsMetadata to centralise astats parsing - Move duplicated astats extraction and conversions into the new helper - convert CrestFactor from linear ratio to dB - convert MinLevel/MaxLevel from linear samples to dBFS - Replace inline extraction in extractFrameMetadata and extractOutputFrameMetadata - Reduce duplication and improve maintainability; no behavioural change Signed-off-by: Martin Wimpress --- internal/processor/analyzer.go | 245 ++++++++++----------------------- 1 file changed, 74 insertions(+), 171 deletions(-) diff --git a/internal/processor/analyzer.go b/internal/processor/analyzer.go index 824a178..db7fb4a 100644 --- a/internal/processor/analyzer.go +++ b/internal/processor/analyzer.go @@ -1506,6 +1506,78 @@ func (b *baseMetadataAccumulators) accumulateSpectral(spectral spectralMetrics) b.spectralFrameCount++ } +// extractAstatsMetadata extracts all astats measurements from FFmpeg metadata. +// These are cumulative values, so we keep the latest from each frame. +// Includes conversions: linearRatioToDB for CrestFactor, linearSampleToDBFS for MinLevel/MaxLevel. +func (b *baseMetadataAccumulators) extractAstatsMetadata(metadata *ffmpeg.AVDictionary) { + if value, ok := getFloatMetadata(metadata, metaKeyDynamicRange); ok { + b.astatsDynamicRange = value + b.astatsFound = true + } + if value, ok := getFloatMetadata(metadata, metaKeyRMSLevel); ok { + b.astatsRMSLevel = value + } + if value, ok := getFloatMetadata(metadata, metaKeyPeakLevel); ok { + b.astatsPeakLevel = value + } + if value, ok := getFloatMetadata(metadata, metaKeyRMSTrough); ok { + b.astatsRMSTrough = value + } + if value, ok := getFloatMetadata(metadata, metaKeyRMSPeak); ok { + b.astatsRMSPeak = value + } + if value, ok := getFloatMetadata(metadata, metaKeyDCOffset); ok { + b.astatsDCOffset = value + } + if value, ok := getFloatMetadata(metadata, metaKeyFlatFactor); ok { + b.astatsFlatFactor = value + } + // CrestFactor: FFmpeg reports as linear ratio (peak/RMS), convert to dB + if value, ok := getFloatMetadata(metadata, metaKeyCrestFactor); ok { + b.astatsCrestFactor = linearRatioToDB(value) + } + if value, ok := getFloatMetadata(metadata, metaKeyZeroCrossingsRate); ok { + b.astatsZeroCrossingsRate = value + } + if value, ok := getFloatMetadata(metadata, metaKeyZeroCrossings); ok { + b.astatsZeroCrossings = value + } + if value, ok := getFloatMetadata(metadata, metaKeyMaxDifference); ok { + b.astatsMaxDifference = value + } + if value, ok := getFloatMetadata(metadata, metaKeyMinDifference); ok { + b.astatsMinDifference = value + } + if value, ok := getFloatMetadata(metadata, metaKeyMeanDifference); ok { + b.astatsMeanDifference = value + } + if value, ok := getFloatMetadata(metadata, metaKeyRMSDifference); ok { + b.astatsRMSDifference = value + } + if value, ok := getFloatMetadata(metadata, metaKeyEntropy); ok { + b.astatsEntropy = value + } + // MinLevel/MaxLevel: FFmpeg reports as linear sample values, convert to dBFS + if value, ok := getFloatMetadata(metadata, metaKeyMinLevel); ok { + b.astatsMinLevel = linearSampleToDBFS(value) + } + if value, ok := getFloatMetadata(metadata, metaKeyMaxLevel); ok { + b.astatsMaxLevel = linearSampleToDBFS(value) + } + if value, ok := getFloatMetadata(metadata, metaKeyNoiseFloor); ok { + b.astatsNoiseFloor = value + } + if value, ok := getFloatMetadata(metadata, metaKeyNoiseFloorCount); ok { + b.astatsNoiseFloorCount = value + } + if value, ok := getFloatMetadata(metadata, metaKeyBitDepth); ok { + b.astatsBitDepth = value + } + if value, ok := getFloatMetadata(metadata, metaKeyNumberOfSamples); ok { + b.astatsNumberOfSamples = value + } +} + // metadataAccumulators holds accumulator variables for Pass 1 frame metadata extraction. // Uses baseMetadataAccumulators for spectral and astats fields shared with output analysis. type metadataAccumulators struct { @@ -1704,111 +1776,7 @@ func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulato // Extract astats measurements (cumulative, so we keep the latest) // For mono audio, stats are under channel .1 - if value, ok := getFloatMetadata(metadata, metaKeyDynamicRange); ok { - acc.astatsDynamicRange = value - acc.astatsFound = true - } - - if value, ok := getFloatMetadata(metadata, metaKeyRMSLevel); ok { - acc.astatsRMSLevel = value - } - - if value, ok := getFloatMetadata(metadata, metaKeyPeakLevel); ok { - acc.astatsPeakLevel = value - } - - // Extract RMS_trough - RMS level of quietest segments (best noise floor indicator for speech) - // In speech audio, quiet inter-word periods contain primarily ambient/electronic noise - if value, ok := getFloatMetadata(metadata, metaKeyRMSTrough); ok { - acc.astatsRMSTrough = value - } - - // Extract RMS_peak - RMS level of loudest segments - if value, ok := getFloatMetadata(metadata, metaKeyRMSPeak); ok { - acc.astatsRMSPeak = value - } - - // Extract DC_offset - mean amplitude displacement from zero - // High values indicate DC bias that should be removed before processing - if value, ok := getFloatMetadata(metadata, metaKeyDCOffset); ok { - acc.astatsDCOffset = value - } - - // Extract Flat_factor - consecutive samples at peak levels (indicates clipping) - // High values suggest pre-existing limiting or clipping damage - if value, ok := getFloatMetadata(metadata, metaKeyFlatFactor); ok { - acc.astatsFlatFactor = value - } - - // Extract Crest_factor - FFmpeg reports as linear ratio (peak/RMS), convert to dB - // High values indicate impulsive/dynamic content, low values indicate compressed/limited audio - if value, ok := getFloatMetadata(metadata, metaKeyCrestFactor); ok { - acc.astatsCrestFactor = linearRatioToDB(value) - } - - // Extract Zero_crossings_rate - rate of zero crossings per sample - // Low ZCR = bass-heavy/sustained tones, High ZCR = noise/sibilance - if value, ok := getFloatMetadata(metadata, metaKeyZeroCrossingsRate); ok { - acc.astatsZeroCrossingsRate = value - } - - // Extract Zero_crossings - total number of zero crossings - if value, ok := getFloatMetadata(metadata, metaKeyZeroCrossings); ok { - acc.astatsZeroCrossings = value - } - - // Extract Max_difference - largest sample-to-sample change - // High values indicate impulsive sounds (clicks, pops) - if value, ok := getFloatMetadata(metadata, metaKeyMaxDifference); ok { - acc.astatsMaxDifference = value - } - - // Extract Min_difference - smallest sample-to-sample change - if value, ok := getFloatMetadata(metadata, metaKeyMinDifference); ok { - acc.astatsMinDifference = value - } - - // Extract Mean_difference - average sample-to-sample change - if value, ok := getFloatMetadata(metadata, metaKeyMeanDifference); ok { - acc.astatsMeanDifference = value - } - - // Extract RMS_difference - RMS of sample-to-sample changes - if value, ok := getFloatMetadata(metadata, metaKeyRMSDifference); ok { - acc.astatsRMSDifference = value - } - - // Extract Entropy - signal randomness (1.0 = white noise, lower = more structured) - if value, ok := getFloatMetadata(metadata, metaKeyEntropy); ok { - acc.astatsEntropy = value - } - - // Extract Min_level and Max_level - FFmpeg reports as linear sample values, convert to dBFS - if value, ok := getFloatMetadata(metadata, metaKeyMinLevel); ok { - acc.astatsMinLevel = linearSampleToDBFS(value) - } - if value, ok := getFloatMetadata(metadata, metaKeyMaxLevel); ok { - acc.astatsMaxLevel = linearSampleToDBFS(value) - } - - // Extract Noise_floor - FFmpeg's own noise floor estimate (dBFS) - // Very useful for adaptive gate/noise reduction thresholds - if value, ok := getFloatMetadata(metadata, metaKeyNoiseFloor); ok { - acc.astatsNoiseFloor = value - } - if value, ok := getFloatMetadata(metadata, metaKeyNoiseFloorCount); ok { - acc.astatsNoiseFloorCount = value - } - - // Extract Bit_depth - effective bit depth of audio - if value, ok := getFloatMetadata(metadata, metaKeyBitDepth); ok { - acc.astatsBitDepth = value - } - - // Extract Number_of_samples - total samples processed - if value, ok := getFloatMetadata(metadata, metaKeyNumberOfSamples); ok { - acc.astatsNumberOfSamples = value - } + acc.extractAstatsMetadata(metadata) // Extract ebur128 measurements (cumulative loudness analysis) // ebur128 provides: M (momentary 400ms), S (short-term 3s), I (integrated), LRA, sample_peak, true_peak @@ -2048,72 +2016,7 @@ func extractOutputFrameMetadata(metadata *ffmpeg.AVDictionary, acc *outputMetada acc.accumulateSpectral(extractSpectralMetrics(metadata)) // Extract astats measurements (cumulative, so we keep the latest) - if value, ok := getFloatMetadata(metadata, metaKeyDynamicRange); ok { - acc.astatsDynamicRange = value - acc.astatsFound = true - } - if value, ok := getFloatMetadata(metadata, metaKeyRMSLevel); ok { - acc.astatsRMSLevel = value - } - if value, ok := getFloatMetadata(metadata, metaKeyPeakLevel); ok { - acc.astatsPeakLevel = value - } - if value, ok := getFloatMetadata(metadata, metaKeyRMSTrough); ok { - acc.astatsRMSTrough = value - } - if value, ok := getFloatMetadata(metadata, metaKeyRMSPeak); ok { - acc.astatsRMSPeak = value - } - if value, ok := getFloatMetadata(metadata, metaKeyDCOffset); ok { - acc.astatsDCOffset = value - } - if value, ok := getFloatMetadata(metadata, metaKeyFlatFactor); ok { - acc.astatsFlatFactor = value - } - // CrestFactor: FFmpeg reports as linear ratio (peak/RMS), convert to dB - if value, ok := getFloatMetadata(metadata, metaKeyCrestFactor); ok { - acc.astatsCrestFactor = linearRatioToDB(value) - } - if value, ok := getFloatMetadata(metadata, metaKeyZeroCrossingsRate); ok { - acc.astatsZeroCrossingsRate = value - } - if value, ok := getFloatMetadata(metadata, metaKeyZeroCrossings); ok { - acc.astatsZeroCrossings = value - } - if value, ok := getFloatMetadata(metadata, metaKeyMaxDifference); ok { - acc.astatsMaxDifference = value - } - if value, ok := getFloatMetadata(metadata, metaKeyMinDifference); ok { - acc.astatsMinDifference = value - } - if value, ok := getFloatMetadata(metadata, metaKeyMeanDifference); ok { - acc.astatsMeanDifference = value - } - if value, ok := getFloatMetadata(metadata, metaKeyRMSDifference); ok { - acc.astatsRMSDifference = value - } - if value, ok := getFloatMetadata(metadata, metaKeyEntropy); ok { - acc.astatsEntropy = value - } - // MinLevel/MaxLevel: FFmpeg reports as linear sample values, convert to dBFS - if value, ok := getFloatMetadata(metadata, metaKeyMinLevel); ok { - acc.astatsMinLevel = linearSampleToDBFS(value) - } - if value, ok := getFloatMetadata(metadata, metaKeyMaxLevel); ok { - acc.astatsMaxLevel = linearSampleToDBFS(value) - } - if value, ok := getFloatMetadata(metadata, metaKeyNoiseFloor); ok { - acc.astatsNoiseFloor = value - } - if value, ok := getFloatMetadata(metadata, metaKeyNoiseFloorCount); ok { - acc.astatsNoiseFloorCount = value - } - if value, ok := getFloatMetadata(metadata, metaKeyBitDepth); ok { - acc.astatsBitDepth = value - } - if value, ok := getFloatMetadata(metadata, metaKeyNumberOfSamples); ok { - acc.astatsNumberOfSamples = value - } + acc.extractAstatsMetadata(metadata) // Extract ebur128 measurements if value, ok := getFloatMetadata(metadata, metaKeyEbur128I); ok { From ea1fe65ebee46908e8b7ea0c43579dbd279c866c Mon Sep 17 00:00:00 2001 From: Martin Wimpress Date: Fri, 6 Feb 2026 17:37:24 +0000 Subject: [PATCH 4/6] refactor(processor): extract shared subregion refinement into helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add refineToSubregion(...) — a generalized sliding-window refinement that accepts window sizes, a scoring function and a comparator to pick the best candidate. - Replace duplicate logic in refineToGoldenSubregion and refineToGoldenSpeechSubregion so both now call the new helper. - Preserve behaviour and early-exit semantics: original region is returned when refinement isn't possible (insufficient intervals or already within target). - Simplify interval/window calculations and reduce duplicated code paths, improving maintainability. Signed-off-by: Martin Wimpress --- internal/processor/analyzer.go | 141 +++++++++++++++------------------ 1 file changed, 65 insertions(+), 76 deletions(-) diff --git a/internal/processor/analyzer.go b/internal/processor/analyzer.go index db7fb4a..a0292c0 100644 --- a/internal/processor/analyzer.go +++ b/internal/processor/analyzer.go @@ -699,37 +699,38 @@ const ( goldenSpeechWindowMinimum = 30 * time.Second // Minimum acceptable window ) -// refineToGoldenSubregion finds the cleanest sub-region within a silence candidate. -// Uses existing interval samples to find the window with lowest average RMS. -// Returns the original region if it's already at or below goldenWindowDuration, -// or if refinement fails for any reason (insufficient intervals, etc.). +// refineToSubregion implements the shared sliding-window refinement logic used by both +// silence and speech sub-region selection. It finds the best-scoring contiguous window +// within the given time range, where "best" is determined by the provided scoring function +// and comparison: isBetter(candidate, current) returns true when candidate should replace current. // -// This addresses cases where a 17.2s candidate at 24.0s absorbed -// both pre-intentional (noisier) and intentional (cleaner) silence periods. -// By refining to the cleanest 10s window, we isolate the optimal noise profile. -func refineToGoldenSubregion(candidate *SilenceRegion, intervals []IntervalSample) *SilenceRegion { - if candidate == nil { - return nil - } - +// Returns the refined start, end, and duration. If refinement is not possible (insufficient +// intervals, already within target), returns the original bounds unchanged and ok=false. +func refineToSubregion( + start, end, duration time.Duration, + intervals []IntervalSample, + windowDuration, windowMinimum time.Duration, + score func([]IntervalSample) float64, + isBetter func(candidate, current float64) bool, +) (refinedStart, refinedEnd, refinedDuration time.Duration, ok bool) { // No refinement needed if already at or below target duration - if candidate.Duration <= goldenWindowDuration { - return candidate + if duration <= windowDuration { + return start, end, duration, false } // Extract intervals within the candidate's time range - candidateIntervals := getIntervalsInRange(intervals, candidate.Start, candidate.End) + candidateIntervals := getIntervalsInRange(intervals, start, end) if candidateIntervals == nil { - return candidate + return start, end, duration, false } - // Calculate window size in intervals (10s / 250ms = 40 intervals) - windowIntervals := int(goldenWindowDuration / goldenIntervalSize) - minimumIntervals := int(goldenWindowMinimum / goldenIntervalSize) + // Calculate window size in intervals + windowIntervals := int(windowDuration / goldenIntervalSize) + minimumIntervals := int(windowMinimum / goldenIntervalSize) // Need at least minimum window worth of intervals if len(candidateIntervals) < minimumIntervals { - return candidate + return start, end, duration, false } // If we have fewer intervals than target window, use what we have @@ -737,28 +738,51 @@ func refineToGoldenSubregion(candidate *SilenceRegion, intervals []IntervalSampl windowIntervals = len(candidateIntervals) } - // Slide window across intervals, finding position with lowest average RMS + // Slide window across intervals, finding the position with the best score bestStartIdx := 0 - bestRMS := scoreIntervalWindow(candidateIntervals[:windowIntervals]) + bestScore := score(candidateIntervals[:windowIntervals]) for startIdx := 1; startIdx <= len(candidateIntervals)-windowIntervals; startIdx++ { - windowRMS := scoreIntervalWindow(candidateIntervals[startIdx : startIdx+windowIntervals]) - if windowRMS < bestRMS { - bestRMS = windowRMS + windowScore := score(candidateIntervals[startIdx : startIdx+windowIntervals]) + if isBetter(windowScore, bestScore) { + bestScore = windowScore bestStartIdx = startIdx } } // Calculate refined region bounds from the best window position - refinedStart := candidateIntervals[bestStartIdx].Timestamp - refinedDuration := time.Duration(windowIntervals) * goldenIntervalSize - refinedEnd := refinedStart + refinedDuration + refinedStart = candidateIntervals[bestStartIdx].Timestamp + refinedDuration = time.Duration(windowIntervals) * goldenIntervalSize + refinedEnd = refinedStart + refinedDuration - return &SilenceRegion{ - Start: refinedStart, - End: refinedEnd, - Duration: refinedDuration, + return refinedStart, refinedEnd, refinedDuration, true +} + +// refineToGoldenSubregion finds the cleanest sub-region within a silence candidate. +// Uses existing interval samples to find the window with lowest average RMS. +// Returns the original region if it's already at or below goldenWindowDuration, +// or if refinement fails for any reason (insufficient intervals, etc.). +// +// This addresses cases where a 17.2s candidate at 24.0s absorbed +// both pre-intentional (noisier) and intentional (cleaner) silence periods. +// By refining to the cleanest 10s window, we isolate the optimal noise profile. +func refineToGoldenSubregion(candidate *SilenceRegion, intervals []IntervalSample) *SilenceRegion { + if candidate == nil { + return nil } + + start, end, dur, ok := refineToSubregion( + candidate.Start, candidate.End, candidate.Duration, + intervals, + goldenWindowDuration, goldenWindowMinimum, + scoreIntervalWindow, + func(candidate, current float64) bool { return candidate < current }, + ) + if !ok { + return candidate + } + + return &SilenceRegion{Start: start, End: end, Duration: dur} } // getIntervalsInRange returns intervals that fall within the given time range. @@ -1101,53 +1125,18 @@ func refineToGoldenSpeechSubregion(candidate *SpeechRegion, intervals []Interval return nil } - // No refinement needed if already at or below target duration - if candidate.Duration <= goldenSpeechWindowDuration { - return candidate - } - - // Extract intervals within the candidate's time range - candidateIntervals := getIntervalsInRange(intervals, candidate.Start, candidate.End) - if candidateIntervals == nil { - return candidate - } - - // Calculate window size in intervals (60s / 250ms = 240 intervals) - windowIntervals := int(goldenSpeechWindowDuration / goldenIntervalSize) - minimumIntervals := int(goldenSpeechWindowMinimum / goldenIntervalSize) - - // Need at least minimum window worth of intervals - if len(candidateIntervals) < minimumIntervals { + start, end, dur, ok := refineToSubregion( + candidate.Start, candidate.End, candidate.Duration, + intervals, + goldenSpeechWindowDuration, goldenSpeechWindowMinimum, + scoreSpeechIntervalWindow, + func(candidate, current float64) bool { return candidate > current }, + ) + if !ok { return candidate } - // If we have fewer intervals than target window, use what we have - if len(candidateIntervals) < windowIntervals { - windowIntervals = len(candidateIntervals) - } - - // Slide window across intervals, finding position with highest speech quality score - bestStartIdx := 0 - bestScore := scoreSpeechIntervalWindow(candidateIntervals[:windowIntervals]) - - for startIdx := 1; startIdx <= len(candidateIntervals)-windowIntervals; startIdx++ { - windowScore := scoreSpeechIntervalWindow(candidateIntervals[startIdx : startIdx+windowIntervals]) - if windowScore > bestScore { - bestScore = windowScore - bestStartIdx = startIdx - } - } - - // Calculate refined region bounds from the best window position - refinedStart := candidateIntervals[bestStartIdx].Timestamp - refinedDuration := time.Duration(windowIntervals) * goldenIntervalSize - refinedEnd := refinedStart + refinedDuration - - return &SpeechRegion{ - Start: refinedStart, - End: refinedEnd, - Duration: refinedDuration, - } + return &SpeechRegion{Start: start, End: end, Duration: dur} } // roomToneScore calculates a 0-1 score indicating how likely an interval is room tone. From a16f9c7b17a9b54cab967f8765a5aaaef391bd56 Mon Sep 17 00:00:00 2001 From: Martin Wimpress Date: Fri, 6 Feb 2026 17:41:51 +0000 Subject: [PATCH 5/6] refactor(processor): consolidate spectral metric accumulation in analyzer - Replace many individual spectral-sum variables with a single spectralMetrics struct - Add IntervalSample.spectralFields(), spectralMetrics.add() and spectralMetrics.average() - Update measureSilenceCandidateFromIntervals and measureSpeechCandidateFromIntervals to accumulate and average spectral metrics via the new helpers - Improve readability, reduce duplication and risk of copy/paste errors No behaviour change; refactor only. Signed-off-by: Martin Wimpress --- internal/processor/analyzer.go | 162 ++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 73 deletions(-) diff --git a/internal/processor/analyzer.go b/internal/processor/analyzer.go index a0292c0..cb1ec57 100644 --- a/internal/processor/analyzer.go +++ b/internal/processor/analyzer.go @@ -833,38 +833,17 @@ func measureSilenceCandidateFromIntervals(region SilenceRegion, intervals []Inte // Accumulate metrics for averaging (sums) and extremes (max) var rmsSum float64 var peakMax, truePeakMax, samplePeakMax float64 = -120.0, -120.0, -120.0 - - // Spectral metrics sums - var meanSum, varianceSum, centroidSum, spreadSum float64 - var skewnessSum, kurtosisSum, entropySum, flatnessSum float64 - var crestSum, fluxSum, slopeSum, decreaseSum, rolloffSum float64 - - // Loudness metrics sums + var spectralSum spectralMetrics var momentarySum, shortTermSum float64 for _, interval := range regionIntervals { - // Amplitude rmsSum += interval.RMSLevel if interval.PeakLevel > peakMax { peakMax = interval.PeakLevel } - // Spectral - meanSum += interval.SpectralMean - varianceSum += interval.SpectralVariance - centroidSum += interval.SpectralCentroid - spreadSum += interval.SpectralSpread - skewnessSum += interval.SpectralSkewness - kurtosisSum += interval.SpectralKurtosis - entropySum += interval.SpectralEntropy - flatnessSum += interval.SpectralFlatness - crestSum += interval.SpectralCrest - fluxSum += interval.SpectralFlux - slopeSum += interval.SpectralSlope - decreaseSum += interval.SpectralDecrease - rolloffSum += interval.SpectralRolloff + spectralSum.add(interval.spectralFields()) - // Loudness (average for momentary/short-term, max for peaks) momentarySum += interval.MomentaryLUFS shortTermSum += interval.ShortTermLUFS if interval.TruePeak > truePeakMax { @@ -877,7 +856,7 @@ func measureSilenceCandidateFromIntervals(region SilenceRegion, intervals []Inte n := float64(len(regionIntervals)) avgRMS := rmsSum / n - avgEntropy := entropySum / n + avgSpectral := spectralSum.average(n) return &SilenceCandidateMetrics{ Region: region, @@ -885,19 +864,19 @@ func measureSilenceCandidateFromIntervals(region SilenceRegion, intervals []Inte PeakLevel: peakMax, CrestFactor: peakMax - avgRMS, - SpectralMean: meanSum / n, - SpectralVariance: varianceSum / n, - SpectralCentroid: centroidSum / n, - SpectralSpread: spreadSum / n, - SpectralSkewness: skewnessSum / n, - SpectralKurtosis: kurtosisSum / n, - SpectralEntropy: avgEntropy, - SpectralFlatness: flatnessSum / n, - SpectralCrest: crestSum / n, - SpectralFlux: fluxSum / n, - SpectralSlope: slopeSum / n, - SpectralDecrease: decreaseSum / n, - SpectralRolloff: rolloffSum / n, + SpectralMean: avgSpectral.Mean, + SpectralVariance: avgSpectral.Variance, + SpectralCentroid: avgSpectral.Centroid, + SpectralSpread: avgSpectral.Spread, + SpectralSkewness: avgSpectral.Skewness, + SpectralKurtosis: avgSpectral.Kurtosis, + SpectralEntropy: avgSpectral.Entropy, + SpectralFlatness: avgSpectral.Flatness, + SpectralCrest: avgSpectral.Crest, + SpectralFlux: avgSpectral.Flux, + SpectralSlope: avgSpectral.Slope, + SpectralDecrease: avgSpectral.Decrease, + SpectralRolloff: avgSpectral.Rolloff, MomentaryLUFS: momentarySum / n, ShortTermLUFS: shortTermSum / n, @@ -1650,6 +1629,63 @@ type spectralMetrics struct { Found bool // True if any spectral metric was extracted } +// spectralFields returns the 13 spectral measurements from this interval as a spectralMetrics value. +// This enables struct-level accumulation instead of 13 individual variables. +func (s *IntervalSample) spectralFields() spectralMetrics { + return spectralMetrics{ + Mean: s.SpectralMean, + Variance: s.SpectralVariance, + Centroid: s.SpectralCentroid, + Spread: s.SpectralSpread, + Skewness: s.SpectralSkewness, + Kurtosis: s.SpectralKurtosis, + Entropy: s.SpectralEntropy, + Flatness: s.SpectralFlatness, + Crest: s.SpectralCrest, + Flux: s.SpectralFlux, + Slope: s.SpectralSlope, + Decrease: s.SpectralDecrease, + Rolloff: s.SpectralRolloff, + Found: true, + } +} + +// add accumulates another spectralMetrics into this one (element-wise sum). +func (m *spectralMetrics) add(other spectralMetrics) { + m.Mean += other.Mean + m.Variance += other.Variance + m.Centroid += other.Centroid + m.Spread += other.Spread + m.Skewness += other.Skewness + m.Kurtosis += other.Kurtosis + m.Entropy += other.Entropy + m.Flatness += other.Flatness + m.Crest += other.Crest + m.Flux += other.Flux + m.Slope += other.Slope + m.Decrease += other.Decrease + m.Rolloff += other.Rolloff +} + +// average returns a new spectralMetrics with all fields divided by n. +func (m spectralMetrics) average(n float64) spectralMetrics { + return spectralMetrics{ + Mean: m.Mean / n, + Variance: m.Variance / n, + Centroid: m.Centroid / n, + Spread: m.Spread / n, + Skewness: m.Skewness / n, + Kurtosis: m.Kurtosis / n, + Entropy: m.Entropy / n, + Flatness: m.Flatness / n, + Crest: m.Crest / n, + Flux: m.Flux / n, + Slope: m.Slope / n, + Decrease: m.Decrease / n, + Rolloff: m.Rolloff / n, + } +} + // extractSpectralMetrics extracts all 13 aspectralstats measurements from FFmpeg metadata. // Returns a spectralMetrics struct with Found=true if at least one metric was extracted. func extractSpectralMetrics(metadata *ffmpeg.AVDictionary) spectralMetrics { @@ -3211,38 +3247,17 @@ func measureSpeechCandidateFromIntervals(region SpeechRegion, intervals []Interv // Accumulate metrics for averaging (sums) and extremes (max) var rmsSum float64 var peakMax, truePeakMax, samplePeakMax float64 = -120.0, -120.0, -120.0 - - // Spectral metrics sums - var meanSum, varianceSum, centroidSum, spreadSum float64 - var skewnessSum, kurtosisSum, entropySum, flatnessSum float64 - var crestSum, fluxSum, slopeSum, decreaseSum, rolloffSum float64 - - // Loudness metrics sums + var spectralSum spectralMetrics var momentarySum, shortTermSum float64 for _, interval := range regionIntervals { - // Amplitude rmsSum += interval.RMSLevel if interval.PeakLevel > peakMax { peakMax = interval.PeakLevel } - // Spectral - meanSum += interval.SpectralMean - varianceSum += interval.SpectralVariance - centroidSum += interval.SpectralCentroid - spreadSum += interval.SpectralSpread - skewnessSum += interval.SpectralSkewness - kurtosisSum += interval.SpectralKurtosis - entropySum += interval.SpectralEntropy - flatnessSum += interval.SpectralFlatness - crestSum += interval.SpectralCrest - fluxSum += interval.SpectralFlux - slopeSum += interval.SpectralSlope - decreaseSum += interval.SpectralDecrease - rolloffSum += interval.SpectralRolloff + spectralSum.add(interval.spectralFields()) - // Loudness (average for momentary/short-term, max for peaks) momentarySum += interval.MomentaryLUFS shortTermSum += interval.ShortTermLUFS if interval.TruePeak > truePeakMax { @@ -3255,6 +3270,7 @@ func measureSpeechCandidateFromIntervals(region SpeechRegion, intervals []Interv n := float64(len(regionIntervals)) avgRMS := rmsSum / n + avgSpectral := spectralSum.average(n) // Calculate voicing density for stability assessment voicedCount := 0 @@ -3271,19 +3287,19 @@ func measureSpeechCandidateFromIntervals(region SpeechRegion, intervals []Interv PeakLevel: peakMax, CrestFactor: peakMax - avgRMS, - SpectralMean: meanSum / n, - SpectralVariance: varianceSum / n, - SpectralCentroid: centroidSum / n, - SpectralSpread: spreadSum / n, - SpectralSkewness: skewnessSum / n, - SpectralKurtosis: kurtosisSum / n, - SpectralEntropy: entropySum / n, - SpectralFlatness: flatnessSum / n, - SpectralCrest: crestSum / n, - SpectralFlux: fluxSum / n, - SpectralSlope: slopeSum / n, - SpectralDecrease: decreaseSum / n, - SpectralRolloff: rolloffSum / n, + SpectralMean: avgSpectral.Mean, + SpectralVariance: avgSpectral.Variance, + SpectralCentroid: avgSpectral.Centroid, + SpectralSpread: avgSpectral.Spread, + SpectralSkewness: avgSpectral.Skewness, + SpectralKurtosis: avgSpectral.Kurtosis, + SpectralEntropy: avgSpectral.Entropy, + SpectralFlatness: avgSpectral.Flatness, + SpectralCrest: avgSpectral.Crest, + SpectralFlux: avgSpectral.Flux, + SpectralSlope: avgSpectral.Slope, + SpectralDecrease: avgSpectral.Decrease, + SpectralRolloff: avgSpectral.Rolloff, MomentaryLUFS: momentarySum / n, ShortTermLUFS: shortTermSum / n, From 1e92d67824ff65eceb79b24a12b5070265b34cd1 Mon Sep 17 00:00:00 2001 From: Martin Wimpress Date: Fri, 6 Feb 2026 17:53:56 +0000 Subject: [PATCH 6/6] refactor(logging): extract three-column metric table helpers - Add threeColMetricSpec, noiseFloorFormatter, and valOr helpers to reduce repetition when rendering Input/Filtered/Final metric columns. - Implement addNoiseFloorMetricRows and addSpeechMetricRows and use concise value selector helpers (v / sv) to build metric slices. - Replace large, repetitive blocks in writeNoiseFloorTable and writeSpeechRegionTable with the new helpers. - Preserve existing behaviour and formatting (no functional change); improves readability and maintainability. Signed-off-by: Martin Wimpress --- internal/logging/report.go | 828 ++++++++----------------------------- 1 file changed, 180 insertions(+), 648 deletions(-) diff --git a/internal/logging/report.go b/internal/logging/report.go index 132b714..1842d77 100644 --- a/internal/logging/report.go +++ b/internal/logging/report.go @@ -250,6 +250,114 @@ func interpretSlope(slope float64) string { } } +// ============================================================================= +// Three-Column Metric Table Helpers +// ============================================================================= +// These helpers eliminate repetition in writeNoiseFloorTable and +// writeSpeechRegionTable, which both display Input/Filtered/Final columns +// for the same set of spectral and loudness metrics. + +// threeColMetricSpec describes a single metric row to be rendered into a +// three-column comparison table. The caller pre-extracts the three float64 +// values from whatever source types are in use. +type threeColMetricSpec struct { + label string // display label († suffix added automatically when gain-normalised) + vals [3]float64 // input, filtered, final + decimals int // formatting precision + unit string // unit suffix (e.g. "Hz", "LUFS") + gainScaling int // 0=none, 1=linear, 2=squared (for normaliseForGain) + interpret func(float64) string // optional interpretation of final value; nil = no interpretation +} + +// noiseFloorFormatter identifies which formatter to use for each value column +// in the noise-floor table. Spectral metrics use formatMetricSpectral +// (showing "n/a" for digital silence); loudness metrics use specialised +// formatters (formatMetricLUFS or formatMetricDB). +type noiseFloorFormatter int + +const ( + nfFmtSpectral noiseFloorFormatter = iota // formatMetric for input, formatMetricSpectral for filtered/final + nfFmtLUFS // formatMetricLUFS for all three columns + nfFmtDB // formatMetricDB for all three columns +) + +// addNoiseFloorMetricRows appends metric rows to a noise-floor table. +// For spectral metrics (nfFmtSpectral), input uses formatMetric and +// filtered/final use formatMetricSpectral with digital silence handling. +// For loudness metrics, the appropriate specialised formatter is used. +func addNoiseFloorMetricRows(table *MetricTable, specs []threeColMetricSpec, fmtMode noiseFloorFormatter, gainNormalise bool, effectiveGainDB float64, filteredIsDigitalSilence, finalIsDigitalSilence bool) { + for _, s := range specs { + input, filtered, final := s.vals[0], s.vals[1], s.vals[2] + + // Apply gain normalisation to final value + if s.gainScaling > 0 && gainNormalise && !finalIsDigitalSilence { + final = normaliseForGain(final, effectiveGainDB, s.gainScaling) + } + + // Add † suffix for gain-normalised metrics + label := s.label + if s.gainScaling > 0 && gainNormalise { + label = s.label + " †" + } + + // Format values according to the formatter mode + var fmtInput, fmtFiltered, fmtFinal string + switch fmtMode { + case nfFmtSpectral: + fmtInput = formatMetric(input, s.decimals) + fmtFiltered = formatMetricSpectral(filtered, s.decimals, filteredIsDigitalSilence) + fmtFinal = formatMetricSpectral(final, s.decimals, finalIsDigitalSilence) + case nfFmtLUFS: + fmtInput = formatMetricLUFS(input, s.decimals) + fmtFiltered = formatMetricLUFS(filtered, s.decimals) + fmtFinal = formatMetricLUFS(final, s.decimals) + case nfFmtDB: + fmtInput = formatMetricDB(input, s.decimals) + fmtFiltered = formatMetricDB(filtered, s.decimals) + fmtFinal = formatMetricDB(final, s.decimals) + } + + table.AddRow(label, []string{fmtInput, fmtFiltered, fmtFinal}, s.unit, "") + } +} + +// addSpeechMetricRows appends metric rows to a speech-region table. +// All values use AddMetricRow (formatMetric internally) with optional +// interpretation of the final value. +func addSpeechMetricRows(table *MetricTable, specs []threeColMetricSpec, gainNormalise bool, effectiveGainDB float64) { + for _, s := range specs { + input, filtered, final := s.vals[0], s.vals[1], s.vals[2] + + // Apply gain normalisation to final value + if s.gainScaling > 0 && gainNormalise { + final = normaliseForGain(final, effectiveGainDB, s.gainScaling) + } + + // Add † suffix for gain-normalised metrics + label := s.label + if s.gainScaling > 0 && gainNormalise { + label = s.label + " †" + } + + // Compute interpretation from the (possibly gain-normalised) final value + var interp string + if s.interpret != nil { + interp = s.interpret(final) + } + + table.AddMetricRow(label, input, filtered, final, s.decimals, s.unit, interp) + } +} + +// valOr returns the field value from a source, or math.NaN() if the source is nil. +// This is a convenience for building threeColMetricSpec slices concisely. +func valOr[T any](src *T, field func(*T) float64) float64 { + if src == nil { + return math.NaN() + } + return field(src) +} + // ============================================================================= // Report Section Formatting Helpers // ============================================================================= @@ -1204,381 +1312,49 @@ func writeNoiseFloorTable(f *os.File, inputMeasurements *processor.AudioMeasurem // For digital silence, spectral metrics are undefined (no signal to analyse). // Show "n/a" instead of misleading zeros or arbitrary values. - // Spectral Mean - inputMean := math.NaN() - filteredMean := math.NaN() - finalMean := math.NaN() - if inputNoise != nil { - inputMean = inputNoise.SpectralMean - } - if filteredNoise != nil { - filteredMean = filteredNoise.SpectralMean - } - if finalNoise != nil { - finalMean = finalNoise.SpectralMean - } - if gainNormalise && !finalIsDigitalSilence { - finalMean = normaliseForGain(finalMean, effectiveGainDB, 1) - } - meanLabel := "Spectral Mean" - if gainNormalise { - meanLabel = "Spectral Mean †" - } - table.AddRow(meanLabel, - []string{ - formatMetric(inputMean, 6), - formatMetricSpectral(filteredMean, 6, filteredIsDigitalSilence), - formatMetricSpectral(finalMean, 6, finalIsDigitalSilence), - }, "", "") - - // Spectral Variance - inputVar := math.NaN() - filteredVar := math.NaN() - finalVar := math.NaN() - if inputNoise != nil { - inputVar = inputNoise.SpectralVariance - } - if filteredNoise != nil { - filteredVar = filteredNoise.SpectralVariance - } - if finalNoise != nil { - finalVar = finalNoise.SpectralVariance - } - if gainNormalise && !finalIsDigitalSilence { - finalVar = normaliseForGain(finalVar, effectiveGainDB, 2) - } - varLabel := "Spectral Variance" - if gainNormalise { - varLabel = "Spectral Variance †" - } - table.AddRow(varLabel, - []string{ - formatMetric(inputVar, 6), - formatMetricSpectral(filteredVar, 6, filteredIsDigitalSilence), - formatMetricSpectral(finalVar, 6, finalIsDigitalSilence), - }, "", "") - - // Spectral Centroid - inputCentroid := math.NaN() - filteredCentroid := math.NaN() - finalCentroid := math.NaN() - if inputNoise != nil { - inputCentroid = inputNoise.SpectralCentroid - } - if filteredNoise != nil { - filteredCentroid = filteredNoise.SpectralCentroid - } - if finalNoise != nil { - finalCentroid = finalNoise.SpectralCentroid - } - table.AddRow("Spectral Centroid", - []string{ - formatMetric(inputCentroid, 0), - formatMetricSpectral(filteredCentroid, 0, filteredIsDigitalSilence), - formatMetricSpectral(finalCentroid, 0, finalIsDigitalSilence), - }, "Hz", "") - - // Spectral Spread - inputSpread := math.NaN() - filteredSpread := math.NaN() - finalSpread := math.NaN() - if inputNoise != nil { - inputSpread = inputNoise.SpectralSpread - } - if filteredNoise != nil { - filteredSpread = filteredNoise.SpectralSpread - } - if finalNoise != nil { - finalSpread = finalNoise.SpectralSpread - } - table.AddRow("Spectral Spread", - []string{ - formatMetric(inputSpread, 0), - formatMetricSpectral(filteredSpread, 0, filteredIsDigitalSilence), - formatMetricSpectral(finalSpread, 0, finalIsDigitalSilence), - }, "Hz", "") - - // Spectral Skewness - inputSkew := math.NaN() - filteredSkew := math.NaN() - finalSkew := math.NaN() - if inputNoise != nil { - inputSkew = inputNoise.SpectralSkewness - } - if filteredNoise != nil { - filteredSkew = filteredNoise.SpectralSkewness - } - if finalNoise != nil { - finalSkew = finalNoise.SpectralSkewness - } - table.AddRow("Spectral Skewness", - []string{ - formatMetric(inputSkew, 3), - formatMetricSpectral(filteredSkew, 3, filteredIsDigitalSilence), - formatMetricSpectral(finalSkew, 3, finalIsDigitalSilence), - }, "", "") - - // Spectral Kurtosis - inputKurt := math.NaN() - filteredKurt := math.NaN() - finalKurt := math.NaN() - if inputNoise != nil { - inputKurt = inputNoise.SpectralKurtosis - } - if filteredNoise != nil { - filteredKurt = filteredNoise.SpectralKurtosis - } - if finalNoise != nil { - finalKurt = finalNoise.SpectralKurtosis - } - table.AddRow("Spectral Kurtosis", - []string{ - formatMetric(inputKurt, 3), - formatMetricSpectral(filteredKurt, 3, filteredIsDigitalSilence), - formatMetricSpectral(finalKurt, 3, finalIsDigitalSilence), - }, "", "") - - // Spectral Entropy - inputEntropy := math.NaN() - filteredEntropy := math.NaN() - finalEntropy := math.NaN() - if inputNoise != nil { - inputEntropy = inputNoise.SpectralEntropy - } else { - inputEntropy = noiseProfile.Entropy // Fall back to NoiseProfile - } - if filteredNoise != nil { - filteredEntropy = filteredNoise.SpectralEntropy - } - if finalNoise != nil { - finalEntropy = finalNoise.SpectralEntropy - } - table.AddRow("Spectral Entropy", - []string{ - formatMetric(inputEntropy, 6), - formatMetricSpectral(filteredEntropy, 6, filteredIsDigitalSilence), - formatMetricSpectral(finalEntropy, 6, finalIsDigitalSilence), - }, "", "") - - // Spectral Flatness - inputFlat := math.NaN() - filteredFlat := math.NaN() - finalFlat := math.NaN() - if inputNoise != nil { - inputFlat = inputNoise.SpectralFlatness - } - if filteredNoise != nil { - filteredFlat = filteredNoise.SpectralFlatness - } - if finalNoise != nil { - finalFlat = finalNoise.SpectralFlatness - } - table.AddRow("Spectral Flatness", - []string{ - formatMetric(inputFlat, 6), - formatMetricSpectral(filteredFlat, 6, filteredIsDigitalSilence), - formatMetricSpectral(finalFlat, 6, finalIsDigitalSilence), - }, "", "") - - // Spectral Crest - inputSpectralCrest := math.NaN() - filteredSpectralCrest := math.NaN() - finalSpectralCrest := math.NaN() - if inputNoise != nil { - inputSpectralCrest = inputNoise.SpectralCrest - } - if filteredNoise != nil { - filteredSpectralCrest = filteredNoise.SpectralCrest - } - if finalNoise != nil { - finalSpectralCrest = finalNoise.SpectralCrest - } - table.AddRow("Spectral Crest", - []string{ - formatMetric(inputSpectralCrest, 3), - formatMetricSpectral(filteredSpectralCrest, 3, filteredIsDigitalSilence), - formatMetricSpectral(finalSpectralCrest, 3, finalIsDigitalSilence), - }, "", "") - - // Spectral Flux - inputFlux := math.NaN() - filteredFlux := math.NaN() - finalFlux := math.NaN() - if inputNoise != nil { - inputFlux = inputNoise.SpectralFlux - } - if filteredNoise != nil { - filteredFlux = filteredNoise.SpectralFlux - } - if finalNoise != nil { - finalFlux = finalNoise.SpectralFlux - } - if gainNormalise && !finalIsDigitalSilence { - finalFlux = normaliseForGain(finalFlux, effectiveGainDB, 2) - } - fluxLabel := "Spectral Flux" - if gainNormalise { - fluxLabel = "Spectral Flux †" - } - table.AddRow(fluxLabel, - []string{ - formatMetric(inputFlux, 6), - formatMetricSpectral(filteredFlux, 6, filteredIsDigitalSilence), - formatMetricSpectral(finalFlux, 6, finalIsDigitalSilence), - }, "", "") - - // Spectral Slope - inputSlope := math.NaN() - filteredSlope := math.NaN() - finalSlope := math.NaN() - if inputNoise != nil { - inputSlope = inputNoise.SpectralSlope - } - if filteredNoise != nil { - filteredSlope = filteredNoise.SpectralSlope - } - if finalNoise != nil { - finalSlope = finalNoise.SpectralSlope - } - if gainNormalise && !finalIsDigitalSilence { - finalSlope = normaliseForGain(finalSlope, effectiveGainDB, 1) - } - slopeLabel := "Spectral Slope" - if gainNormalise { - slopeLabel = "Spectral Slope †" - } - table.AddRow(slopeLabel, - []string{ - formatMetric(inputSlope, 9), - formatMetricSpectral(filteredSlope, 9, filteredIsDigitalSilence), - formatMetricSpectral(finalSlope, 9, finalIsDigitalSilence), - }, "", "") - - // Spectral Decrease - inputDecrease := math.NaN() - filteredDecrease := math.NaN() - finalDecrease := math.NaN() - if inputNoise != nil { - inputDecrease = inputNoise.SpectralDecrease - } - if filteredNoise != nil { - filteredDecrease = filteredNoise.SpectralDecrease - } - if finalNoise != nil { - finalDecrease = finalNoise.SpectralDecrease - } - table.AddRow("Spectral Decrease", - []string{ - formatMetric(inputDecrease, 6), - formatMetricSpectral(filteredDecrease, 6, filteredIsDigitalSilence), - formatMetricSpectral(finalDecrease, 6, finalIsDigitalSilence), - }, "", "") - - // Spectral Rolloff - inputRolloff := math.NaN() - filteredRolloff := math.NaN() - finalRolloff := math.NaN() - if inputNoise != nil { - inputRolloff = inputNoise.SpectralRolloff - } - if filteredNoise != nil { - filteredRolloff = filteredNoise.SpectralRolloff - } - if finalNoise != nil { - finalRolloff = finalNoise.SpectralRolloff + // Entropy input has a special fallback to NoiseProfile when candidate not found + inputEntropy := valOr(inputNoise, func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralEntropy }) + if inputNoise == nil { + inputEntropy = noiseProfile.Entropy } - table.AddRow("Spectral Rolloff", - []string{ - formatMetric(inputRolloff, 0), - formatMetricSpectral(filteredRolloff, 0, filteredIsDigitalSilence), - formatMetricSpectral(finalRolloff, 0, finalIsDigitalSilence), - }, "Hz", "") - - // ========== LOUDNESS METRICS ========== + filteredEntropy := valOr(filteredNoise, func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralEntropy }) + finalEntropy := valOr(finalNoise, func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralEntropy }) - // Momentary LUFS - use special formatting for values below measurement floor - inputMomentary := math.NaN() - filteredMomentary := math.NaN() - finalMomentary := math.NaN() - if inputNoise != nil { - inputMomentary = inputNoise.MomentaryLUFS - } - if filteredNoise != nil { - filteredMomentary = filteredNoise.MomentaryLUFS - } - if finalNoise != nil { - finalMomentary = finalNoise.MomentaryLUFS + v := func(field func(*processor.SilenceCandidateMetrics) float64) [3]float64 { + return [3]float64{ + valOr(inputNoise, field), + valOr(filteredNoise, field), + valOr(finalNoise, field), + } } - table.AddRow("Momentary LUFS", - []string{ - formatMetricLUFS(inputMomentary, 1), - formatMetricLUFS(filteredMomentary, 1), - formatMetricLUFS(finalMomentary, 1), - }, - "LUFS", "") - // Short-term LUFS - inputShortTerm := math.NaN() - filteredShortTerm := math.NaN() - finalShortTerm := math.NaN() - if inputNoise != nil { - inputShortTerm = inputNoise.ShortTermLUFS - } - if filteredNoise != nil { - filteredShortTerm = filteredNoise.ShortTermLUFS - } - if finalNoise != nil { - finalShortTerm = finalNoise.ShortTermLUFS - } - table.AddRow("Short-term LUFS", - []string{ - formatMetricLUFS(inputShortTerm, 1), - formatMetricLUFS(filteredShortTerm, 1), - formatMetricLUFS(finalShortTerm, 1), - }, - "LUFS", "") + addNoiseFloorMetricRows(table, []threeColMetricSpec{ + {"Spectral Mean", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralMean }), 6, "", 1, nil}, + {"Spectral Variance", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralVariance }), 6, "", 2, nil}, + {"Spectral Centroid", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralCentroid }), 0, "Hz", 0, nil}, + {"Spectral Spread", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralSpread }), 0, "Hz", 0, nil}, + {"Spectral Skewness", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralSkewness }), 3, "", 0, nil}, + {"Spectral Kurtosis", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralKurtosis }), 3, "", 0, nil}, + {"Spectral Entropy", [3]float64{inputEntropy, filteredEntropy, finalEntropy}, 6, "", 0, nil}, + {"Spectral Flatness", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralFlatness }), 6, "", 0, nil}, + {"Spectral Crest", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralCrest }), 3, "", 0, nil}, + {"Spectral Flux", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralFlux }), 6, "", 2, nil}, + {"Spectral Slope", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralSlope }), 9, "", 1, nil}, + {"Spectral Decrease", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralDecrease }), 6, "", 0, nil}, + {"Spectral Rolloff", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SpectralRolloff }), 0, "Hz", 0, nil}, + }, nfFmtSpectral, gainNormalise, effectiveGainDB, filteredIsDigitalSilence, finalIsDigitalSilence) - // True Peak - values are now stored in dB (converted during measurement) - inputTP := math.NaN() - filteredTP := math.NaN() - finalTP := math.NaN() - if inputNoise != nil { - inputTP = inputNoise.TruePeak - } - if filteredNoise != nil { - filteredTP = filteredNoise.TruePeak - } - if finalNoise != nil { - finalTP = finalNoise.TruePeak - } - table.AddRow("True Peak", - []string{ - formatMetricDB(inputTP, 1), - formatMetricDB(filteredTP, 1), - formatMetricDB(finalTP, 1), - }, - "dBTP", "") + // ========== LOUDNESS METRICS ========== - // Sample Peak - values are now stored in dB (converted during measurement) - inputSP := math.NaN() - filteredSP := math.NaN() - finalSP := math.NaN() - if inputNoise != nil { - inputSP = inputNoise.SamplePeak - } - if filteredNoise != nil { - filteredSP = filteredNoise.SamplePeak - } - if finalNoise != nil { - finalSP = finalNoise.SamplePeak - } - table.AddRow("Sample Peak", - []string{ - formatMetricDB(inputSP, 1), - formatMetricDB(filteredSP, 1), - formatMetricDB(finalSP, 1), - }, - "dBFS", "") + addNoiseFloorMetricRows(table, []threeColMetricSpec{ + {"Momentary LUFS", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.MomentaryLUFS }), 1, "LUFS", 0, nil}, + {"Short-term LUFS", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.ShortTermLUFS }), 1, "LUFS", 0, nil}, + }, nfFmtLUFS, gainNormalise, effectiveGainDB, filteredIsDigitalSilence, finalIsDigitalSilence) + + addNoiseFloorMetricRows(table, []threeColMetricSpec{ + {"True Peak", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.TruePeak }), 1, "dBTP", 0, nil}, + {"Sample Peak", v(func(m *processor.SilenceCandidateMetrics) float64 { return m.SamplePeak }), 1, "dBFS", 0, nil}, + }, nfFmtDB, gainNormalise, effectiveGainDB, filteredIsDigitalSilence, finalIsDigitalSilence) // Character (interpretation row) - based on entropy // For digital silence, show "silent" instead of attempting to characterise non-existent noise @@ -1693,290 +1469,46 @@ func writeSpeechRegionTable(f *os.File, inputMeasurements *processor.AudioMeasur // ========== SPECTRAL METRICS ========== - // Spectral Mean - inputMean := math.NaN() - filteredMean := math.NaN() - finalMean := math.NaN() - if inputSpeech != nil { - inputMean = inputSpeech.SpectralMean - } - if filteredSpeech != nil { - filteredMean = filteredSpeech.SpectralMean - } - if finalSpeech != nil { - finalMean = finalSpeech.SpectralMean - } - if gainNormalise { - finalMean = normaliseForGain(finalMean, effectiveGainDB, 1) - } - meanLabel := "Spectral Mean" - if gainNormalise { - meanLabel = "Spectral Mean †" - } - table.AddMetricRow(meanLabel, inputMean, filteredMean, finalMean, 6, "", "") - - // Spectral Variance - inputVar := math.NaN() - filteredVar := math.NaN() - finalVar := math.NaN() - if inputSpeech != nil { - inputVar = inputSpeech.SpectralVariance - } - if filteredSpeech != nil { - filteredVar = filteredSpeech.SpectralVariance - } - if finalSpeech != nil { - finalVar = finalSpeech.SpectralVariance - } - if gainNormalise { - finalVar = normaliseForGain(finalVar, effectiveGainDB, 2) - } - varLabel := "Spectral Variance" - if gainNormalise { - varLabel = "Spectral Variance †" - } - table.AddMetricRow(varLabel, inputVar, filteredVar, finalVar, 6, "", "") - - // Spectral Centroid - inputCentroid := math.NaN() - filteredCentroid := math.NaN() - finalCentroid := math.NaN() - if inputSpeech != nil { - inputCentroid = inputSpeech.SpectralCentroid - } - if filteredSpeech != nil { - filteredCentroid = filteredSpeech.SpectralCentroid - } - if finalSpeech != nil { - finalCentroid = finalSpeech.SpectralCentroid - } - table.AddMetricRow("Spectral Centroid", inputCentroid, filteredCentroid, finalCentroid, 0, "Hz", interpretCentroid(finalCentroid)) - - // Spectral Spread - inputSpread := math.NaN() - filteredSpread := math.NaN() - finalSpread := math.NaN() - if inputSpeech != nil { - inputSpread = inputSpeech.SpectralSpread - } - if filteredSpeech != nil { - filteredSpread = filteredSpeech.SpectralSpread - } - if finalSpeech != nil { - finalSpread = finalSpeech.SpectralSpread - } - table.AddMetricRow("Spectral Spread", inputSpread, filteredSpread, finalSpread, 0, "Hz", interpretSpread(finalSpread)) - - // Spectral Skewness - inputSkew := math.NaN() - filteredSkew := math.NaN() - finalSkew := math.NaN() - if inputSpeech != nil { - inputSkew = inputSpeech.SpectralSkewness - } - if filteredSpeech != nil { - filteredSkew = filteredSpeech.SpectralSkewness - } - if finalSpeech != nil { - finalSkew = finalSpeech.SpectralSkewness - } - table.AddMetricRow("Spectral Skewness", inputSkew, filteredSkew, finalSkew, 3, "", interpretSkewness(finalSkew)) - - // Spectral Kurtosis - inputKurt := math.NaN() - filteredKurt := math.NaN() - finalKurt := math.NaN() - if inputSpeech != nil { - inputKurt = inputSpeech.SpectralKurtosis - } - if filteredSpeech != nil { - filteredKurt = filteredSpeech.SpectralKurtosis - } - if finalSpeech != nil { - finalKurt = finalSpeech.SpectralKurtosis - } - table.AddMetricRow("Spectral Kurtosis", inputKurt, filteredKurt, finalKurt, 3, "", interpretKurtosis(finalKurt)) - - // Spectral Entropy - inputEntropy := math.NaN() - filteredEntropy := math.NaN() - finalEntropy := math.NaN() - if inputSpeech != nil { - inputEntropy = inputSpeech.SpectralEntropy - } - if filteredSpeech != nil { - filteredEntropy = filteredSpeech.SpectralEntropy - } - if finalSpeech != nil { - finalEntropy = finalSpeech.SpectralEntropy - } - table.AddMetricRow("Spectral Entropy", inputEntropy, filteredEntropy, finalEntropy, 6, "", interpretEntropy(finalEntropy)) - - // Spectral Flatness - inputFlat := math.NaN() - filteredFlat := math.NaN() - finalFlat := math.NaN() - if inputSpeech != nil { - inputFlat = inputSpeech.SpectralFlatness - } - if filteredSpeech != nil { - filteredFlat = filteredSpeech.SpectralFlatness - } - if finalSpeech != nil { - finalFlat = finalSpeech.SpectralFlatness - } - table.AddMetricRow("Spectral Flatness", inputFlat, filteredFlat, finalFlat, 6, "", interpretFlatness(finalFlat)) - - // Spectral Crest - inputSpectralCrest := math.NaN() - filteredSpectralCrest := math.NaN() - finalSpectralCrest := math.NaN() - if inputSpeech != nil { - inputSpectralCrest = inputSpeech.SpectralCrest - } - if filteredSpeech != nil { - filteredSpectralCrest = filteredSpeech.SpectralCrest - } - if finalSpeech != nil { - finalSpectralCrest = finalSpeech.SpectralCrest - } - table.AddMetricRow("Spectral Crest", inputSpectralCrest, filteredSpectralCrest, finalSpectralCrest, 3, "", interpretCrest(finalSpectralCrest)) - - // Spectral Flux - inputFlux := math.NaN() - filteredFlux := math.NaN() - finalFlux := math.NaN() - if inputSpeech != nil { - inputFlux = inputSpeech.SpectralFlux - } - if filteredSpeech != nil { - filteredFlux = filteredSpeech.SpectralFlux - } - if finalSpeech != nil { - finalFlux = finalSpeech.SpectralFlux - } - if gainNormalise { - finalFlux = normaliseForGain(finalFlux, effectiveGainDB, 2) - } - fluxLabel := "Spectral Flux" - if gainNormalise { - fluxLabel = "Spectral Flux †" - } - table.AddMetricRow(fluxLabel, inputFlux, filteredFlux, finalFlux, 6, "", interpretFlux(finalFlux)) - - // Spectral Slope - inputSlope := math.NaN() - filteredSlope := math.NaN() - finalSlope := math.NaN() - if inputSpeech != nil { - inputSlope = inputSpeech.SpectralSlope - } - if filteredSpeech != nil { - filteredSlope = filteredSpeech.SpectralSlope - } - if finalSpeech != nil { - finalSlope = finalSpeech.SpectralSlope - } - if gainNormalise { - finalSlope = normaliseForGain(finalSlope, effectiveGainDB, 1) - } - slopeLabel := "Spectral Slope" - if gainNormalise { - slopeLabel = "Spectral Slope †" + // Extract centroid and entropy values needed by the Character row below + inputCentroid := valOr(inputSpeech, func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralCentroid }) + filteredCentroid := valOr(filteredSpeech, func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralCentroid }) + finalCentroid := valOr(finalSpeech, func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralCentroid }) + inputEntropy := valOr(inputSpeech, func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralEntropy }) + filteredEntropy := valOr(filteredSpeech, func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralEntropy }) + finalEntropy := valOr(finalSpeech, func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralEntropy }) + + sv := func(field func(*processor.SpeechCandidateMetrics) float64) [3]float64 { + return [3]float64{ + valOr(inputSpeech, field), + valOr(filteredSpeech, field), + valOr(finalSpeech, field), + } } - table.AddMetricRow(slopeLabel, inputSlope, filteredSlope, finalSlope, 9, "", interpretSlope(finalSlope)) - // Spectral Decrease - inputDecrease := math.NaN() - filteredDecrease := math.NaN() - finalDecrease := math.NaN() - if inputSpeech != nil { - inputDecrease = inputSpeech.SpectralDecrease - } - if filteredSpeech != nil { - filteredDecrease = filteredSpeech.SpectralDecrease - } - if finalSpeech != nil { - finalDecrease = finalSpeech.SpectralDecrease - } - table.AddMetricRow("Spectral Decrease", inputDecrease, filteredDecrease, finalDecrease, 6, "", interpretDecrease(finalDecrease)) - - // Spectral Rolloff - inputRolloff := math.NaN() - filteredRolloff := math.NaN() - finalRolloff := math.NaN() - if inputSpeech != nil { - inputRolloff = inputSpeech.SpectralRolloff - } - if filteredSpeech != nil { - filteredRolloff = filteredSpeech.SpectralRolloff - } - if finalSpeech != nil { - finalRolloff = finalSpeech.SpectralRolloff - } - table.AddMetricRow("Spectral Rolloff", inputRolloff, filteredRolloff, finalRolloff, 0, "Hz", interpretRolloff(finalRolloff)) + addSpeechMetricRows(table, []threeColMetricSpec{ + {"Spectral Mean", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralMean }), 6, "", 1, nil}, + {"Spectral Variance", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralVariance }), 6, "", 2, nil}, + {"Spectral Centroid", [3]float64{inputCentroid, filteredCentroid, finalCentroid}, 0, "Hz", 0, interpretCentroid}, + {"Spectral Spread", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralSpread }), 0, "Hz", 0, interpretSpread}, + {"Spectral Skewness", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralSkewness }), 3, "", 0, interpretSkewness}, + {"Spectral Kurtosis", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralKurtosis }), 3, "", 0, interpretKurtosis}, + {"Spectral Entropy", [3]float64{inputEntropy, filteredEntropy, finalEntropy}, 6, "", 0, interpretEntropy}, + {"Spectral Flatness", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralFlatness }), 6, "", 0, interpretFlatness}, + {"Spectral Crest", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralCrest }), 3, "", 0, interpretCrest}, + {"Spectral Flux", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralFlux }), 6, "", 2, interpretFlux}, + {"Spectral Slope", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralSlope }), 9, "", 1, interpretSlope}, + {"Spectral Decrease", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralDecrease }), 6, "", 0, interpretDecrease}, + {"Spectral Rolloff", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SpectralRolloff }), 0, "Hz", 0, interpretRolloff}, + }, gainNormalise, effectiveGainDB) // ========== LOUDNESS METRICS ========== - // Momentary LUFS - inputMomentary := math.NaN() - filteredMomentary := math.NaN() - finalMomentary := math.NaN() - if inputSpeech != nil { - inputMomentary = inputSpeech.MomentaryLUFS - } - if filteredSpeech != nil { - filteredMomentary = filteredSpeech.MomentaryLUFS - } - if finalSpeech != nil { - finalMomentary = finalSpeech.MomentaryLUFS - } - table.AddMetricRow("Momentary LUFS", inputMomentary, filteredMomentary, finalMomentary, 1, "LUFS", "") - - // Short-term LUFS - inputShortTerm := math.NaN() - filteredShortTerm := math.NaN() - finalShortTerm := math.NaN() - if inputSpeech != nil { - inputShortTerm = inputSpeech.ShortTermLUFS - } - if filteredSpeech != nil { - filteredShortTerm = filteredSpeech.ShortTermLUFS - } - if finalSpeech != nil { - finalShortTerm = finalSpeech.ShortTermLUFS - } - table.AddMetricRow("Short-term LUFS", inputShortTerm, filteredShortTerm, finalShortTerm, 1, "LUFS", "") - - // True Peak - inputTP := math.NaN() - filteredTP := math.NaN() - finalTP := math.NaN() - if inputSpeech != nil { - inputTP = inputSpeech.TruePeak - } - if filteredSpeech != nil { - filteredTP = filteredSpeech.TruePeak - } - if finalSpeech != nil { - finalTP = finalSpeech.TruePeak - } - table.AddMetricRow("True Peak", inputTP, filteredTP, finalTP, 1, "dBTP", "") - - // Sample Peak - inputSP := math.NaN() - filteredSP := math.NaN() - finalSP := math.NaN() - if inputSpeech != nil { - inputSP = inputSpeech.SamplePeak - } - if filteredSpeech != nil { - filteredSP = filteredSpeech.SamplePeak - } - if finalSpeech != nil { - finalSP = finalSpeech.SamplePeak - } - table.AddMetricRow("Sample Peak", inputSP, filteredSP, finalSP, 1, "dBFS", "") + addSpeechMetricRows(table, []threeColMetricSpec{ + {"Momentary LUFS", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.MomentaryLUFS }), 1, "LUFS", 0, nil}, + {"Short-term LUFS", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.ShortTermLUFS }), 1, "LUFS", 0, nil}, + {"True Peak", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.TruePeak }), 1, "dBTP", 0, nil}, + {"Sample Peak", sv(func(m *processor.SpeechCandidateMetrics) float64 { return m.SamplePeak }), 1, "dBFS", 0, nil}, + }, gainNormalise, effectiveGainDB) // Character (interpretation row) - based on spectral centroid and entropy // Speech character describes voice quality: warm, balanced, bright, etc.