diff --git a/backend/database/users.py b/backend/database/users.py index e0ee857eeb..96257305de 100644 --- a/backend/database/users.py +++ b/backend/database/users.py @@ -7,6 +7,7 @@ from ._client import db, document_id_from_seed from models.users import Subscription, PlanLimits, PlanType, SubscriptionStatus from utils.subscription import get_default_basic_subscription +from utils.other.storage import delete_all_user_private_cloud_sync_data def is_exists_user(uid: str): @@ -438,6 +439,10 @@ def delete_user_data(uid: str): print(f"Processed all documents in {collection_ref.path}") break + # delete private cloud sync recordings from GCS + print(f"Deleting private cloud sync data for user: {uid}") + delete_all_user_private_cloud_sync_data(uid) + # delete the user document itself print(f"Deleting user document: {uid}") user_ref.delete() diff --git a/backend/utils/other/storage.py b/backend/utils/other/storage.py index fc302c3cee..b00a18acc2 100644 --- a/backend/utils/other/storage.py +++ b/backend/utils/other/storage.py @@ -402,6 +402,25 @@ def delete_conversation_audio_files(uid: str, conversation_id: str) -> None: blob.delete() +def delete_all_user_private_cloud_sync_data(uid: str) -> None: + """Delete all private cloud sync data (chunks and merged audio) for a user.""" + if not uid: + return + bucket = storage_client.bucket(private_cloud_sync_bucket) + + # Use a batch request to efficiently delete all blobs for the user. + with storage_client.batch(): + # Delete all chunks for this user + chunks_prefix = f'chunks/{uid}/' + for blob in bucket.list_blobs(prefix=chunks_prefix): + blob.delete() + + # Delete all merged audio files for this user + audio_prefix = f'audio/{uid}/' + for blob in bucket.list_blobs(prefix=audio_prefix): + blob.delete() + + def download_audio_chunks_and_merge( uid: str, conversation_id: str,