Skip to content

Commit fd40a37

Browse files
committed
fix: 修复原唱回音导致翻唱卡顿异响
- DeEcho质量好时(sep>30dB,corr>0.9)跳过blend直接使用deecho输出, 避免45%原始回音被混回 - 增强blend系数: global_echo驱动自适应, 重回音时deecho权重可达0.90 - 修复_prepare_vocals_for_vc双重覆盖bug: mono_resolved标志防止 advanced_dereverb结果被行1132覆盖 - uvr_deecho缺模型时回退到advanced_dereverb而非direct - 源约束排除direct模式, auto模式加入advanced_dereverb - 默认配置: index_rate 0.70→0.50, protect 0.50→0.33, vc_preprocess_mode→auto, source_constraint_mode→auto - 同步3个preset和check_deecho_config.py期望值 - UI/i18n文案: "直接进入RVC"→"算法去混响"
1 parent d6e4bf0 commit fd40a37

8 files changed

Lines changed: 95 additions & 67 deletions

File tree

check_deecho_config.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,11 @@ def check_config():
6363

6464
# 检查关键配置
6565
checks = [
66-
("VC 预处理模式", "vc_preprocess_mode", "uvr_deecho", cover_config.get("vc_preprocess_mode")),
67-
("源约束模式", "source_constraint_mode", "on", cover_config.get("source_constraint_mode")),
66+
("VC 预处理模式", "vc_preprocess_mode", "auto", cover_config.get("vc_preprocess_mode")),
67+
("源约束模式", "source_constraint_mode", "auto", cover_config.get("source_constraint_mode")),
6868
("Karaoke 分离", "karaoke_separation", True, cover_config.get("karaoke_separation")),
69-
("索引率", "index_rate", 0.30, cover_config.get("index_rate")),
70-
("保护系数", "protect", 0.30, cover_config.get("protect")),
69+
("索引率", "index_rate", 0.50, cover_config.get("index_rate")),
70+
("保护系数", "protect", 0.33, cover_config.get("protect")),
7171
]
7272

7373
all_correct = True
@@ -93,25 +93,22 @@ def print_recommendations():
9393
print("=" * 60)
9494

9595
print("""
96-
1. 当前配置已启用激进去回声模式:
97-
- 强制使用 UVR DeEcho 模型
98-
- 总是启用源约束后处理
96+
1. 当前配置使用自动模式:
97+
- 优先使用 UVR DeEcho 模型,缺模型时回退到算法去混响
98+
- DeEcho 质量好时跳过 blend 直接使用,避免混回原始回音
99+
- 源约束仅在去过回音的预处理下自动启用
99100
100101
2. 如果回声仍然明显,可以尝试:
101-
- 在 UI 中调整"索引率"(降低到 0.1-0.2
102+
- 在 UI 中调整"索引率"(降低到 0.2-0.3
102103
- 在 UI 中调整"保护系数"(降低到 0.2-0.25)
103104
- 使用更高质量的输入音频
104105
105106
3. 处理流程:
106-
原始音频 → Karaoke 分离 → UVR DeEcho → RVC 转换 → 源约束 → 输出
107+
原始音频 → Karaoke 分离 → UVR DeEcho(或算法去混响) → RVC 转换 → 输出
107108
108-
4. 如果需要更激进的处理,可以修改代码中的参数:
109-
- infer/cover_pipeline.py 第 1391 行:回声衰减系数 0.92 → 0.85
110-
- infer/cover_pipeline.py 第 1402 行:软掩码系数 0.7 → 0.5
111-
112-
5. 测试建议:
109+
4. 测试建议:
113110
- 选择一首有明显回声的歌曲
114-
- 处理后使用 Audacity 查看频谱图
111+
- 查看日志中 DeEcho quality 指标
115112
- 对比处理前后的回声强度
116113
""")
117114

configs/config.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@
4242
"demucs_split": true,
4343
"f0_method": "hybrid",
4444
"disable_chunking": false,
45-
"index_rate": 0.70,
45+
"index_rate": 0.50,
4646
"filter_radius": 3,
4747
"rms_mix_rate": 0.0,
48-
"protect": 0.50,
48+
"protect": 0.33,
4949
"speaker_id": 0,
5050
"hubert_layer": 12,
5151
"silence_gate": false,
@@ -70,8 +70,8 @@
7070
"f0_stabilize_octave": true,
7171
"f0_rate_limit": false,
7272
"f0_rate_limit_semitones": 12.0,
73-
"vc_preprocess_mode": "uvr_deecho",
74-
"source_constraint_mode": "on",
73+
"vc_preprocess_mode": "auto",
74+
"source_constraint_mode": "auto",
7575
"vc_pipeline_mode": "current",
7676
"singing_repair": false,
7777
"reverb_reapply": true,

configs/presets/balanced.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
"f0_stabilize": true,
1414
"f0_stabilize_window": 3,
1515
"f0_stabilize_max_semitones": 3.0,
16-
"vc_preprocess_mode": "uvr_deecho",
17-
"source_constraint_mode": "on",
16+
"vc_preprocess_mode": "auto",
17+
"source_constraint_mode": "auto",
1818
"uvr5_agg": 10
1919
}
2020
}

configs/presets/clarity_priority.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
"f0_stabilize": false,
1414
"f0_stabilize_window": 2,
1515
"f0_stabilize_max_semitones": 2.0,
16-
"vc_preprocess_mode": "uvr_deecho",
17-
"source_constraint_mode": "on",
16+
"vc_preprocess_mode": "auto",
17+
"source_constraint_mode": "auto",
1818
"uvr5_agg": 8
1919
}
2020
}

configs/presets/timbre_priority.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
"f0_stabilize": true,
1414
"f0_stabilize_window": 5,
1515
"f0_stabilize_max_semitones": 4.0,
16-
"vc_preprocess_mode": "uvr_deecho",
17-
"source_constraint_mode": "on",
16+
"vc_preprocess_mode": "auto",
17+
"source_constraint_mode": "auto",
1818
"uvr5_agg": 12
1919
}
2020
}

i18n/zh_CN.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@
7575
"select_to_download": "选择要下载的角色",
7676
"download": "下载",
7777
"vc_preprocess_mode": "VC预处理策略",
78-
"vc_preprocess_mode_info": "参考成熟项目:优先学习型 DeEcho/DeReverb,否则主唱直通 RVC",
78+
"vc_preprocess_mode_info": "优先学习型 DeEcho/DeReverb,缺模型时回退到算法去混响",
7979
"vc_preprocess_auto": "自动(推荐)",
8080
"vc_preprocess_direct": "主唱直通",
8181
"vc_preprocess_uvr_deecho": "官方DeEcho优先",
8282
"vc_preprocess_legacy": "旧版手工链",
8383
"source_constraint_mode": "源约束策略",
84-
"source_constraint_mode_info": "自动(推荐)下仅旧版手工链启用;学习型 DeEcho 或主唱直通默认不再追加自定义源约束",
84+
"source_constraint_mode_info": "自动(推荐)下仅去过回音的预处理启用;主唱直通模式不追加源约束以避免放大回音",
8585
"source_constraint_auto": "自动(推荐)",
8686
"source_constraint_off": "关闭",
8787
"source_constraint_on": "始终开启",
@@ -103,7 +103,7 @@
103103
"voice_models": "语音模型",
104104
"voice_models_desc": "将 .pth 模型文件放入 assets/weights/ 目录,然后刷新模型列表。",
105105
"mature_deecho_models": "成熟 DeEcho 模型",
106-
"mature_deecho_models_desc": "用于成熟项目常见的学习型去回声/去混响流程。未下载时,翻唱页的自动模式会回退为主唱直通 RVC",
106+
"mature_deecho_models_desc": "用于成熟项目常见的学习型去回声/去混响流程。未下载时,翻唱页会回退到算法去混响(效果略弱于学习型模型)",
107107
"download_mature_deecho": "下载成熟 DeEcho 模型",
108108
"mature_deecho_status": "成熟 DeEcho 状态",
109109
"mature_deecho_check": "检查成熟 DeEcho 状态"

infer/cover_pipeline.py

Lines changed: 65 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ def _select_best_uvr_deecho_output(self, reference_path: str, candidate_files: L
225225
"""Pick the UVR DeEcho branch best suited for VC input."""
226226
best_path = None
227227
best_score = None
228+
best_metrics = None
228229

229230
for candidate_path in candidate_files:
230231
scored = self._score_uvr_deecho_candidate(reference_path, candidate_path)
@@ -241,7 +242,10 @@ def _select_best_uvr_deecho_output(self, reference_path: str, candidate_files: L
241242
if best_score is None or score > best_score:
242243
best_score = score
243244
best_path = candidate_path
245+
best_metrics = metrics
244246

247+
# 保存最佳候选的质量指标,供 blend 决策使用
248+
self._uvr_deecho_metrics = best_metrics
245249
return best_path
246250

247251
def _init_separator(
@@ -902,9 +906,12 @@ def _should_apply_source_constraint(
902906
"""Decide whether to run source-guided post constraint."""
903907
normalized_mode = str(source_constraint_mode or "auto").strip().lower()
904908
if normalized_mode == "on":
909+
if self._last_vc_preprocess_mode == "direct":
910+
log.detail("源约束跳过: direct 模式下源未去回音,强制约束会放大回音伪影")
911+
return False
905912
return vc_preprocessed
906913
if normalized_mode == "auto":
907-
return vc_preprocessed and self._last_vc_preprocess_mode in {"uvr_deecho", "legacy"}
914+
return vc_preprocessed and self._last_vc_preprocess_mode in {"uvr_deecho", "legacy", "advanced_dereverb"}
908915
return False
909916

910917
def _refine_source_constrained_output(
@@ -1022,14 +1029,17 @@ def _blend_direct_with_deecho(
10221029
echo_ratio = echo_ratio[:n_blend]
10231030

10241031
# --- Blending weight ---
1025-
# Base: original low-activity weight (for silent gaps)
1026-
base_weight = 0.65 * np.square(1.0 - activity[:n_blend])
1027-
# Echo boost: even during active singing, apply DeEcho proportional
1028-
# to detected echo. Max additional contribution capped at 0.55.
1029-
echo_boost = 0.55 * echo_ratio * activity[:n_blend]
1032+
# 全局回音水平驱动系数自适应
1033+
global_echo = float(np.mean(echo_ratio))
1034+
# 沉默段基权: 轻回音0.65, 重回音0.85
1035+
base_coef = 0.65 + 0.20 * global_echo
1036+
base_weight = base_coef * np.square(1.0 - activity[:n_blend])
1037+
# 活跃唱段 echo_boost: 轻回音0.55, 重回音0.90
1038+
echo_boost_coef = 0.55 + 0.35 * global_echo
1039+
echo_boost = echo_boost_coef * echo_ratio * activity[:n_blend]
10301040
deecho_weight = base_weight + echo_boost
10311041
deecho_weight = np.convolve(deecho_weight, smooth_kernel, mode="same")
1032-
deecho_weight = np.clip(deecho_weight, 0.0, 0.80)
1042+
deecho_weight = np.clip(deecho_weight, 0.0, 0.95)
10331043
deecho_weight = CoverPipeline._frame_curve_to_sample_gain(
10341044
deecho_weight,
10351045
aligned_len,
@@ -1066,6 +1076,7 @@ def _prepare_vocals_for_vc(
10661076

10671077
# 保存原始混响用于后处理
10681078
self._original_reverb_path = None
1079+
self._uvr_deecho_metrics = None
10691080

10701081
if preprocess_mode == "advanced_dereverb":
10711082
# 使用高级去混响:分离干声和混响
@@ -1096,17 +1107,20 @@ def _prepare_vocals_for_vc(
10961107
log.detail("VC preprocess: legacy dereverb chain -> mono select")
10971108
else:
10981109
preprocess_input = vocals_path
1110+
mono_resolved = False
1111+
10991112
if preprocess_mode in {"auto", "uvr_deecho"}:
11001113
preprocess_input = self._apply_uvr_deecho_for_vc(vocals_path, session_dir) or vocals_path
11011114

11021115
if preprocess_input == vocals_path:
1103-
# 如果UVR DeEcho不可用,在auto模式下使用advanced dereverb
1104-
if preprocess_mode == "auto":
1116+
if preprocess_mode in {"auto", "uvr_deecho"}:
1117+
# auto / uvr_deecho 模式在 UVR 模型缺失时都回退到 advanced_dereverb
11051118
audio, sr = librosa.load(vocals_path, sr=None, mono=False)
11061119
audio = self._ensure_2d(audio).astype(np.float32)
11071120
mono = self._select_mono_for_vc(audio, sr)
11081121

1109-
log.detail("VC preprocess: UVR DeEcho not available, using advanced dereverb")
1122+
fallback_name = "auto" if preprocess_mode == "auto" else "uvr_deecho"
1123+
log.detail(f"VC preprocess ({fallback_name}): UVR DeEcho not available, using advanced dereverb")
11101124
dry_signal, reverb_tail = advanced_dereverb(mono, sr)
11111125

11121126
# 保存混响用于后处理
@@ -1116,38 +1130,55 @@ def _prepare_vocals_for_vc(
11161130

11171131
mono = dry_signal
11181132
self._last_vc_preprocess_mode = "advanced_dereverb"
1133+
mono_resolved = True
11191134
log.detail(f"Dry/Wet separation: dry RMS={np.sqrt(np.mean(dry_signal**2)):.4f}, reverb RMS={np.sqrt(np.mean(reverb_tail**2)):.4f}")
11201135
else:
1136+
# direct 模式
11211137
self._last_vc_preprocess_mode = "direct"
1122-
if preprocess_mode == "uvr_deecho":
1123-
log.warning("Official DeEcho model not found, falling back to direct lead input")
11241138
log.detail("VC preprocess: direct lead -> mono select")
1125-
audio, sr = librosa.load(preprocess_input, sr=None, mono=False)
1126-
audio = self._ensure_2d(audio).astype(np.float32)
1127-
mono = self._select_mono_for_vc(audio, sr)
11281139
else:
11291140
self._last_vc_preprocess_mode = "uvr_deecho"
11301141
log.detail("VC preprocess: UVR learned DeEcho/DeReverb -> mono select")
11311142

1132-
if preprocess_input == vocals_path:
1133-
audio, sr = librosa.load(preprocess_input, sr=None, mono=False)
1134-
audio = self._ensure_2d(audio).astype(np.float32)
1135-
mono = self._select_mono_for_vc(audio, sr)
1136-
else:
1137-
direct_audio, sr = librosa.load(vocals_path, sr=None, mono=False)
1138-
deecho_audio, deecho_sr = librosa.load(preprocess_input, sr=None, mono=False)
1139-
direct_audio = self._ensure_2d(direct_audio).astype(np.float32)
1140-
deecho_audio = self._ensure_2d(deecho_audio).astype(np.float32)
1141-
direct_mono = self._select_mono_for_vc(direct_audio, sr)
1142-
deecho_mono = self._select_mono_for_vc(deecho_audio, deecho_sr)
1143-
if deecho_sr != sr:
1144-
deecho_mono = librosa.resample(
1145-
deecho_mono,
1146-
orig_sr=deecho_sr,
1147-
target_sr=sr,
1148-
).astype(np.float32)
1149-
mono = self._blend_direct_with_deecho(direct_mono, deecho_mono, sr)
1150-
log.detail("VC preprocess: blended direct lead with UVR DeEcho")
1143+
# 最终 mono 确定(仅在 mono 未被上面解决时执行)
1144+
if not mono_resolved:
1145+
if preprocess_input == vocals_path:
1146+
audio, sr = librosa.load(preprocess_input, sr=None, mono=False)
1147+
audio = self._ensure_2d(audio).astype(np.float32)
1148+
mono = self._select_mono_for_vc(audio, sr)
1149+
else:
1150+
direct_audio, sr = librosa.load(vocals_path, sr=None, mono=False)
1151+
deecho_audio, deecho_sr = librosa.load(preprocess_input, sr=None, mono=False)
1152+
direct_audio = self._ensure_2d(direct_audio).astype(np.float32)
1153+
deecho_audio = self._ensure_2d(deecho_audio).astype(np.float32)
1154+
direct_mono = self._select_mono_for_vc(direct_audio, sr)
1155+
deecho_mono = self._select_mono_for_vc(deecho_audio, deecho_sr)
1156+
if deecho_sr != sr:
1157+
deecho_mono = librosa.resample(
1158+
deecho_mono,
1159+
orig_sr=deecho_sr,
1160+
target_sr=sr,
1161+
).astype(np.float32)
1162+
1163+
# DeEcho 质量检测:用 UVR 候选打分指标判断是否跳过 blend
1164+
uvr_metrics = getattr(self, '_uvr_deecho_metrics', None)
1165+
skip_blend = False
1166+
if uvr_metrics:
1167+
sep_db = uvr_metrics.get('separation_db', 0.0)
1168+
corr = uvr_metrics.get('corr', 0.0)
1169+
log.detail(
1170+
f"DeEcho quality: sep={sep_db:.2f}dB, corr={corr:.3f}"
1171+
)
1172+
# sep > 30dB 且 corr > 0.9 说明 DeEcho 质量好
1173+
if sep_db > 30.0 and corr > 0.9:
1174+
skip_blend = True
1175+
1176+
if skip_blend:
1177+
mono = deecho_mono
1178+
log.detail("VC preprocess: UVR DeEcho quality sufficient, using deecho directly (skip blend)")
1179+
else:
1180+
mono = CoverPipeline._blend_direct_with_deecho(direct_mono, deecho_mono, sr)
1181+
log.detail("VC preprocess: blended direct lead with UVR DeEcho (enhanced)")
11511182

11521183
mono = soft_clip(mono, threshold=0.9, ceiling=0.99)
11531184

ui/app.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,8 @@ def get_cover_vc_route_status(
646646
])
647647
return newline.join([
648648
"⚠️ 当前设为官方 DeEcho 优先,但本地缺少模型",
649-
"当前将回退流程: 主唱分离 → 直接进入 RVC → 混音",
650-
"建议: 先在模型管理页下载成熟 DeEcho 模型",
649+
"当前将回退流程: 主唱分离 → 算法去混响 → RVC → 混音",
650+
"建议: 下载成熟 DeEcho 模型可获得更好效果",
651651
])
652652

653653
if preferred:
@@ -657,9 +657,9 @@ def get_cover_vc_route_status(
657657
"流程: 主唱分离 → UVR DeEcho/DeReverb → RVC → 混音",
658658
])
659659
return newline.join([
660-
"ℹ️ 自动模式当前会回退为主唱直通 RVC",
661-
"原因: 本地未检测到成熟 DeEcho / DeReverb 模型",
662-
"流程: 主唱分离 → 直接进入 RVC → 混音",
660+
"ℹ️ 自动模式当前使用算法去混响",
661+
"原因: 本地未检测到成熟 DeEcho / DeReverb 模型,已回退到 advanced dereverb",
662+
"流程: 主唱分离 → 算法去混响 → RVC → 混音",
663663
])
664664

665665

0 commit comments

Comments
 (0)