Skip to content

Commit 8b80fbb

Browse files
Artemariusclaude
andcommitted
v0.3.1: NR mode selection (stationary/adaptive/auto) + advanced parameters
- Add stationary/adaptive/auto mode to noise reduction with auto-mode logic: stationary when stem-guided profile available, adaptive otherwise - Add advanced NR dialog (threshold, freq/time smoothing, GPU toggle) - Add torch/CUDA auto-detection for GPU-accelerated noise reduction - Update defaults: HPF 100 Hz, EQ bright preset, limiter -0.7 dB - 10 new tests (7 noise_reduction + 3 effects) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e92a672 commit 8b80fbb

6 files changed

Lines changed: 333 additions & 19 deletions

File tree

tests/test_effects.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,3 +478,36 @@ def test_eq_compressor_limiter_integration(self):
478478
assert result.shape == data.shape
479479
assert result.dtype == np.float32
480480
assert np.all(np.isfinite(result))
481+
482+
483+
# --- Spectral Noise Reduction wrapper tests ---
484+
485+
486+
class TestSpectralNoiseReduction:
487+
488+
def test_mode_param_accepted(self):
489+
"""Wrapper should accept mode kwarg without error."""
490+
data = _make_tone(duration_s=2.0)
491+
result = spectral_noise_reduction(data, SR, strength=0.5, mode="adaptive")
492+
assert result.shape == data.shape
493+
assert result.dtype == np.float32
494+
495+
def test_advanced_params_accepted(self):
496+
"""All new kwargs should pass through to reduce_noise."""
497+
data = _make_tone(duration_s=2.0)
498+
result = spectral_noise_reduction(
499+
data, SR, strength=0.5, mode="stationary",
500+
n_std_thresh=2.0, use_torch=False,
501+
freq_smooth_hz=300, time_smooth_ms=80,
502+
)
503+
assert result.shape == data.shape
504+
assert np.all(np.isfinite(result))
505+
506+
def test_default_config_includes_mode(self):
507+
"""DEFAULT_CONFIG should have mode='auto' for spectral_noise_reduction."""
508+
snr_cfg = DEFAULT_CONFIG["spectral_noise_reduction"]
509+
assert snr_cfg["mode"] == "auto"
510+
assert "n_std_thresh" in snr_cfg
511+
assert "use_torch" in snr_cfg
512+
assert "freq_smooth_hz" in snr_cfg
513+
assert "time_smooth_ms" in snr_cfg

tests/test_noise_reduction.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,93 @@ def test_reduce_noise_with_hpf():
255255
# The 440 Hz tone should be preserved, the 40 Hz rumble attenuated
256256
corr_tone = np.corrcoef(tone, result)[0, 1]
257257
assert corr_tone > 0.95
258+
259+
260+
# --- Mode selection tests ---
261+
262+
263+
def test_stationary_mode_explicit():
264+
"""mode='stationary' with explicit noise clip should reduce noise."""
265+
rng = np.random.default_rng(42)
266+
noise_only = (0.1 * rng.standard_normal(SR // 2)).astype(np.float32)
267+
tone_noisy = _make_noisy_tone(duration_s=1.5, noise_level=0.1)
268+
signal = np.concatenate([noise_only, tone_noisy])
269+
noise_clip = signal[:SR // 2]
270+
271+
reduced = reduce_noise(signal, SR, noise_clip=noise_clip,
272+
strength=1.0, mode="stationary")
273+
original_rms = np.sqrt(np.mean(signal[:SR // 2].astype(np.float64) ** 2))
274+
reduced_rms = np.sqrt(np.mean(reduced[:SR // 2].astype(np.float64) ** 2))
275+
assert reduced_rms < original_rms
276+
277+
278+
def test_adaptive_mode_explicit():
279+
"""mode='adaptive' should reduce noise without explicit noise clip."""
280+
noisy = _make_noisy_tone(duration_s=2.0, noise_level=0.15)
281+
reduced = reduce_noise(noisy, SR, strength=0.75, mode="adaptive")
282+
assert reduced.shape == noisy.shape
283+
assert reduced.dtype == np.float32
284+
285+
286+
def test_auto_mode_with_guide_stem_uses_stationary():
287+
"""Auto mode with a good stem should use stationary (stronger NR)."""
288+
rng = np.random.default_rng(42)
289+
silence = np.zeros(SR, dtype=np.float32)
290+
tone = _make_tone(duration_s=1.0, freq=440)
291+
vocal_sep = np.concatenate([silence, tone])
292+
293+
noise_full = (0.1 * rng.standard_normal(SR * 2)).astype(np.float32)
294+
vocal_rec = noise_full.copy()
295+
vocal_rec[SR:] += tone
296+
297+
reduced = reduce_noise(vocal_rec, SR, strength=1.0,
298+
guide_stem=vocal_sep, mode="auto")
299+
# Should successfully reduce noise in the silent region
300+
original_rms = np.sqrt(np.mean(vocal_rec[:SR].astype(np.float64) ** 2))
301+
reduced_rms = np.sqrt(np.mean(reduced[:SR].astype(np.float64) ** 2))
302+
assert reduced_rms < original_rms
303+
304+
305+
def test_auto_mode_without_guide_uses_adaptive():
306+
"""Auto mode without stem or clip should fallback to adaptive."""
307+
noisy = _make_noisy_tone(duration_s=2.0, noise_level=0.1)
308+
reduced = reduce_noise(noisy, SR, strength=0.75, mode="auto")
309+
assert reduced.shape == noisy.shape
310+
assert reduced.dtype == np.float32
311+
312+
313+
def test_smoothing_parameters_accepted():
314+
"""Custom freq/time smoothing values should not crash."""
315+
noisy = _make_noisy_tone(duration_s=1.0, noise_level=0.1)
316+
result = reduce_noise(noisy, SR, strength=0.5,
317+
freq_smooth_hz=200, time_smooth_ms=100)
318+
assert result.shape == noisy.shape
319+
assert np.all(np.isfinite(result))
320+
321+
322+
def test_n_std_thresh_parameter():
323+
"""Lower n_std_thresh should produce more aggressive noise reduction."""
324+
rng = np.random.default_rng(42)
325+
noise_only = (0.1 * rng.standard_normal(SR // 2)).astype(np.float32)
326+
tone_noisy = _make_noisy_tone(duration_s=1.5, noise_level=0.1)
327+
signal = np.concatenate([noise_only, tone_noisy])
328+
noise_clip = signal[:SR // 2]
329+
330+
reduced_mild = reduce_noise(signal, SR, noise_clip=noise_clip,
331+
strength=1.0, mode="stationary",
332+
n_std_thresh=3.0)
333+
reduced_aggressive = reduce_noise(signal, SR, noise_clip=noise_clip,
334+
strength=1.0, mode="stationary",
335+
n_std_thresh=0.5)
336+
rms_mild = np.sqrt(np.mean(reduced_mild[:SR // 2].astype(np.float64) ** 2))
337+
rms_aggressive = np.sqrt(np.mean(
338+
reduced_aggressive[:SR // 2].astype(np.float64) ** 2))
339+
assert rms_aggressive <= rms_mild
340+
341+
342+
def test_use_torch_false_explicit():
343+
"""use_torch=False should work (CPU path)."""
344+
noisy = _make_noisy_tone(duration_s=1.0, noise_level=0.1)
345+
result = reduce_noise(noisy, SR, strength=0.5, use_torch=False)
346+
assert result.shape == noisy.shape
347+
assert result.dtype == np.float32

vocalforge/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.3.0"
1+
__version__ = "0.3.1"

vocalforge/audio/effects.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@
4646
"stub": False,
4747
"strength": 0.75,
4848
"guide_stem": None,
49+
"mode": "auto",
50+
"n_std_thresh": 1.5,
51+
"use_torch": None,
52+
"freq_smooth_hz": 500,
53+
"time_smooth_ms": 50,
4954
},
5055
"dereverb": {
5156
"enabled": False,
@@ -56,12 +61,12 @@
5661
"highpass_filter": {
5762
"enabled": True,
5863
"stub": False,
59-
"cutoff_hz": 80.0,
64+
"cutoff_hz": 100.0,
6065
},
6166
"parametric_eq": {
6267
"enabled": False,
6368
"stub": False,
64-
"preset": "clean_up",
69+
"preset": "bright",
6570
"bands": None, # filled from EQ_PRESETS at import time (see below)
6671
},
6772
"compressor": {
@@ -86,7 +91,7 @@
8691
"limiter": {
8792
"enabled": True,
8893
"stub": False,
89-
"ceiling_db": -1.0,
94+
"ceiling_db": -0.7,
9095
"release_ms": 50.0,
9196
},
9297
}
@@ -97,26 +102,26 @@
97102
"Clean": {
98103
"noise_gate": {"enabled": True, "threshold_db": -35.0, "attack_ms": 2.0,
99104
"release_ms": 100.0, "hold_ms": 50.0, "reduction_db": -40.0},
100-
"spectral_noise_reduction": {"enabled": True, "strength": 0.75},
105+
"spectral_noise_reduction": {"enabled": True, "strength": 0.75, "mode": "auto"},
101106
"dereverb": {"enabled": False},
102-
"highpass_filter": {"enabled": True, "cutoff_hz": 80.0},
107+
"highpass_filter": {"enabled": True, "cutoff_hz": 100.0},
103108
"parametric_eq": {"enabled": False},
104109
"compressor": {"enabled": False},
105110
"de_esser": {"enabled": False},
106111
"reverb": {"enabled": False},
107-
"limiter": {"enabled": True, "ceiling_db": -1.0},
112+
"limiter": {"enabled": True, "ceiling_db": -0.7},
108113
},
109114
"Enhanced": {
110115
"noise_gate": {"enabled": True, "threshold_db": -35.0, "attack_ms": 2.0,
111116
"release_ms": 100.0, "hold_ms": 50.0, "reduction_db": -40.0},
112-
"spectral_noise_reduction": {"enabled": True, "strength": 0.75},
117+
"spectral_noise_reduction": {"enabled": True, "strength": 0.75, "mode": "auto"},
113118
"dereverb": {"enabled": True, "strength": 0.5},
114-
"highpass_filter": {"enabled": True, "cutoff_hz": 80.0},
115-
"parametric_eq": {"enabled": True, "preset": "clean_up"},
119+
"highpass_filter": {"enabled": True, "cutoff_hz": 100.0},
120+
"parametric_eq": {"enabled": True, "preset": "bright"},
116121
"compressor": {"enabled": True, "threshold_db": -18.0, "ratio": 3.0},
117122
"de_esser": {"enabled": False},
118123
"reverb": {"enabled": False},
119-
"limiter": {"enabled": True, "ceiling_db": -1.0},
124+
"limiter": {"enabled": True, "ceiling_db": -0.7},
120125
},
121126
}
122127

@@ -142,7 +147,7 @@
142147
}
143148

144149
# Patch DEFAULT_CONFIG with actual EQ bands now that EQ_PRESETS is defined
145-
DEFAULT_CONFIG["parametric_eq"]["bands"] = EQ_PRESETS["clean_up"]
150+
DEFAULT_CONFIG["parametric_eq"]["bands"] = EQ_PRESETS["bright"]
146151

147152

148153
def _merge_config(defaults: dict, overrides: dict | None) -> dict:
@@ -271,6 +276,11 @@ def spectral_noise_reduction(data: np.ndarray, sr: int, **params) -> np.ndarray:
271276
"""
272277
strength = params.get("strength", 0.75)
273278
guide_stem = params.get("guide_stem", None)
279+
mode = params.get("mode", "auto")
280+
n_std_thresh = params.get("n_std_thresh", 1.5)
281+
use_torch = params.get("use_torch", None)
282+
freq_smooth_hz = params.get("freq_smooth_hz", 500)
283+
time_smooth_ms = params.get("time_smooth_ms", 50)
274284

275285
if strength == 0.0:
276286
return data
@@ -282,6 +292,11 @@ def spectral_noise_reduction(data: np.ndarray, sr: int, **params) -> np.ndarray:
282292
strength=strength,
283293
guide_stem=guide_stem,
284294
hpf_cutoff_hz=0.0,
295+
mode=mode,
296+
n_std_thresh=n_std_thresh,
297+
use_torch=use_torch,
298+
freq_smooth_hz=freq_smooth_hz,
299+
time_smooth_ms=time_smooth_ms,
285300
)
286301

287302

vocalforge/audio/noise_reduction.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ def reduce_noise(
157157
strength: float = 1.0,
158158
guide_stem: np.ndarray | None = None,
159159
hpf_cutoff_hz: float = 0.0,
160+
mode: str = "auto",
161+
n_std_thresh: float = 1.5,
162+
use_torch: bool | None = None,
163+
freq_smooth_hz: float = 500,
164+
time_smooth_ms: float = 50,
160165
) -> np.ndarray:
161166
"""Apply spectral-gating noise reduction to audio.
162167
@@ -170,6 +175,14 @@ def reduce_noise(
170175
guide_stem: Optional separated vocal stem used to find silent regions
171176
for noise profiling. Only used when noise_clip is None.
172177
hpf_cutoff_hz: High-pass filter cutoff in Hz. 0 = disabled.
178+
mode: NR algorithm mode — "auto", "stationary", or "adaptive".
179+
"auto" uses stationary when a reliable noise profile is available
180+
(explicit clip or stem-guided), adaptive otherwise.
181+
n_std_thresh: Stationary mode threshold sensitivity (0.5–3.0).
182+
use_torch: Whether to use torch/CUDA acceleration.
183+
None = auto-detect CUDA availability.
184+
freq_smooth_hz: Frequency mask smoothing width in Hz.
185+
time_smooth_ms: Temporal mask smoothing width in ms.
173186
174187
Returns:
175188
Noise-reduced audio, same shape and dtype (float32) as input.
@@ -190,11 +203,33 @@ def reduce_noise(
190203

191204
import noisereduce as nr
192205

206+
# Track whether we obtained a high-quality noise profile
207+
_had_good_profile = noise_clip is not None # user supplied explicitly
208+
193209
if noise_clip is None and guide_stem is not None:
194210
noise_clip = estimate_noise_from_stem(data, guide_stem, sample_rate)
211+
if noise_clip is not None:
212+
_had_good_profile = True # stem-guided = good quality
195213

196214
if noise_clip is None:
197215
noise_clip = estimate_noise_profile(data, sample_rate)
216+
# _had_good_profile stays False — first-0.5s fallback is weak
217+
218+
# Resolve mode → boolean
219+
if mode == "auto":
220+
stationary = _had_good_profile # stationary when profile is reliable
221+
elif mode == "stationary":
222+
stationary = True
223+
else: # "adaptive"
224+
stationary = False
225+
226+
# Torch / CUDA auto-detection
227+
if use_torch is None:
228+
try:
229+
import torch
230+
use_torch = torch.cuda.is_available()
231+
except ImportError:
232+
use_torch = False
198233

199234
is_mono = data.ndim == 1
200235

@@ -211,6 +246,12 @@ def reduce_noise(
211246
sr=sample_rate,
212247
y_noise=y_noise,
213248
prop_decrease=float(strength),
249+
stationary=stationary,
250+
n_std_thresh_stationary=float(n_std_thresh),
251+
use_torch=use_torch,
252+
device="cuda" if use_torch else "cpu",
253+
freq_mask_smooth_hz=float(freq_smooth_hz),
254+
time_mask_smooth_ms=float(time_smooth_ms),
214255
)
215256

216257
# Transpose back for multichannel

0 commit comments

Comments
 (0)