Skip to content

Commit 49d2c43

Browse files
committed
Merge branch 'fix/vram-cleanup' into fix/backend-non-root
2 parents b0ff17e + 14556c3 commit 49d2c43

3 files changed

Lines changed: 42 additions & 0 deletions

File tree

backend/app/services/speaker_embedding_service.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,25 @@ def extract_reference_embedding(self, audio_paths: list[str]) -> Optional[np.nda
218218
def get_embedding_dimension(self) -> int:
219219
"""Get the dimension of the embeddings produced by the model."""
220220
return 512 # Pyannote embedding dimension (updated for newer models)
221+
222+
def cleanup(self):
223+
"""
224+
Explicitly cleanup the embedding model and free GPU memory.
225+
226+
This should be called when the service is no longer needed to ensure
227+
proper GPU memory management, especially when multiple models are used
228+
in sequence during transcription processing.
229+
"""
230+
if hasattr(self, "inference"):
231+
logger.info("Cleaning up PyAnnote embedding model")
232+
del self.inference
233+
234+
# Force aggressive memory cleanup
235+
import gc
236+
237+
gc.collect()
238+
239+
if torch.cuda.is_available():
240+
torch.cuda.empty_cache()
241+
torch.cuda.synchronize()
242+
logger.info("GPU memory cleaned up after embedding service")

backend/app/tasks/transcription/core.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,24 @@ def whisperx_progress_callback(progress, message):
321321
f"Speaker identification completed: {len(speaker_results)} speakers processed"
322322
)
323323

324+
# CRITICAL: Clean up embedding service and matching service to free VRAM
325+
# PyAnnote embedding model uses ~500 MB and must be deleted before OpenSearch indexing
326+
embedding_service.cleanup()
327+
del embedding_service
328+
del matching_service
329+
324330
except Exception as e:
325331
logger.warning(f"Error in speaker identification: {e}")
326332
# Continue with transcription even if speaker matching fails
327333

334+
# Force GPU memory cleanup checkpoint before OpenSearch indexing
335+
# This ensures all models are fully cleared from VRAM
336+
from app.utils.hardware_detection import detect_hardware
337+
338+
hardware_config = detect_hardware()
339+
hardware_config.optimize_memory_usage()
340+
logger.info("GPU memory cleanup checkpoint completed")
341+
328342
with session_scope() as db:
329343
update_task_status(db, task_id, "in_progress", progress=0.85)
330344

backend/app/tasks/transcription/whisperx_service.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,12 @@ def perform_speaker_diarization(
245245
)
246246

247247
diarize_segments = diarize_model(audio, **diarize_params)
248+
249+
# CRITICAL: Clean up diarization model immediately to free VRAM
250+
# This model uses ~2-3 GB and must be deleted before speaker embedding extraction
251+
del diarize_model
252+
self.hardware_config.optimize_memory_usage()
253+
248254
return diarize_segments
249255

250256
except Exception as e:

0 commit comments

Comments
 (0)