From 4b2584c611fa5192fb9012c1e4d78069ba2cf49e Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Fri, 2 Jan 2026 01:53:57 -0500 Subject: [PATCH 1/8] feat: add one-click docker-compose deployment and setup script --- .env.example | 43 ++++++++++++++++++++++++++++ docker-compose.yml | 71 ++++++++++++++++++++++++++++++++++++++++++++++ setup.sh | 56 ++++++++++++++++++++++++++++++++++++ 3 files changed, 170 insertions(+) create mode 100644 .env.example create mode 100644 docker-compose.yml create mode 100755 setup.sh diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000..2626455ded --- /dev/null +++ b/.env.example @@ -0,0 +1,43 @@ +# Omi One-Click Deployment Configuration +# Copy this to .env and fill in the values + +# --- Core API Keys --- +# Required for transcription +DEEPGRAM_API_KEY= +# Required for LLM features +OPENAI_API_KEY= +# Optional: Other LLM providers +ANTHROPIC_API_KEY= + +# --- Backend Configuration --- +REDIS_DB_HOST=redis +REDIS_DB_PORT=6379 +REDIS_DB_PASSWORD= + +# Vector Database (Pinecone is default in current code) +PINECONE_API_KEY= +PINECONE_INDEX_NAME=memories-backend + +# Database (Firebase/Firestore is default) +# Provide the JSON content of your service account if using Firestore +SERVICE_ACCOUNT_JSON= + +# --- Service URLs (Internal Docker Networking) --- +HOSTED_VAD_API_URL=http://vad:8080/v1/vad +HOSTED_SPEAKER_EMBEDDING_API_URL=http://diarizer:8080 +HOSTED_PUSHER_API_URL=http://pusher:8080 + +# --- Frontend Configuration --- +# Public URL of the backend (use localhost for local dev) +NEXT_PUBLIC_API_URL=http://localhost:8080 +# Firebase config for the frontend +NEXT_PUBLIC_FIREBASE_API_KEY= +NEXT_PUBLIC_FIREBASE_AUTH_DOMAIN= +NEXT_PUBLIC_FIREBASE_PROJECT_ID= +NEXT_PUBLIC_FIREBASE_STORAGE_BUCKET= +NEXT_PUBLIC_FIREBASE_MESSAGING_SENDER_ID= +NEXT_PUBLIC_FIREBASE_APP_ID= + +# --- Development / Debugging --- +ADMIN_KEY=some_secret_key +ENCRYPTION_SECRET=omi_default_secret_change_me diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..d5dab49edc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,71 @@ +services: + backend: + build: + context: . + dockerfile: backend/Dockerfile + restart: always + ports: + - "8080:8080" + env_file: + - .env + depends_on: + - redis + + pusher: + build: + context: . + dockerfile: backend/pusher/Dockerfile + restart: always + ports: + - "8081:8080" + env_file: + - .env + depends_on: + - redis + + frontend: + build: + context: . + dockerfile: web/frontend/Dockerfile + args: + - API_URL=http://localhost:8080 + - NEXT_PUBLIC_FIREBASE_API_KEY=${NEXT_PUBLIC_FIREBASE_API_KEY:-fake_key_for_build} + - NEXT_PUBLIC_FIREBASE_AUTH_DOMAIN=${NEXT_PUBLIC_FIREBASE_AUTH_DOMAIN:-omi-app.firebaseapp.com} + - NEXT_PUBLIC_FIREBASE_PROJECT_ID=${NEXT_PUBLIC_FIREBASE_PROJECT_ID:-omi-app} + - NEXT_PUBLIC_FIREBASE_STORAGE_BUCKET=${NEXT_PUBLIC_FIREBASE_STORAGE_BUCKET:-omi-app.appspot.com} + - NEXT_PUBLIC_FIREBASE_MESSAGING_SENDER_ID=${NEXT_PUBLIC_FIREBASE_MESSAGING_SENDER_ID:-123456789} + - NEXT_PUBLIC_FIREBASE_APP_ID=${NEXT_PUBLIC_FIREBASE_APP_ID:-1:123456789:web:abcdef} + - NEXT_PUBLIC_FIREBASE_MEASUREMENT_ID=${NEXT_PUBLIC_FIREBASE_MEASUREMENT_ID:-G-ABCDEF} + restart: always + ports: + - "3000:3000" + env_file: + - .env + extra_hosts: + - "host.docker.internal:host-gateway" + depends_on: + - backend + + redis: + image: redis:alpine + restart: always + ports: + - "6379:6379" + + # Optional: Diarizer (Requires NVIDIA GPU and NVIDIA Container Toolkit) + # diarizer: + # build: + # context: . + # dockerfile: backend/diarizer/Dockerfile + # restart: always + # ports: + # - "8082:8080" + # env_file: + # - .env + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000000..2162dc4c4e --- /dev/null +++ b/setup.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Omi One-Click Setup Script +# Designed for backend developers and customers with low technical expertise. + +set -e + +echo "==========================================" +echo " 🚀 Omi One-Click Setup (Docker) " +echo "==========================================" + +# Check for Docker +if ! command -v docker &> /dev/null; then + echo "❌ Error: Docker is not installed. Please install Docker first: https://docs.docker.com/get-docker/" + exit 1 +fi + +# Check for Docker Compose +if ! docker compose version &> /dev/null; then + echo "❌ Error: Docker Compose is not installed. Please install it or use a newer version of Docker Desktop." + exit 1 +fi + +# Create .env if it doesn't exist +if [ ! -f .env ]; then + echo "📄 Creating .env from .env.example..." + cp .env.example .env + echo "⚠️ Action Required: Please edit the .env file and add your API keys." + echo " At minimum, you need: DEEPGRAM_API_KEY and OPENAI_API_KEY." + + # Optional: try to open the editor + if command -v nano &> /dev/null; then + read -p "Would you like to edit .env now? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + nano .env + fi + fi +fi + +# Build and Start +echo "🛠️ Building and starting Omi services..." +docker compose up -d --build + +echo "" +echo "==========================================" +echo "✅ Omi is now starting up!" +echo "" +echo "Services available at:" +echo "👉 Frontend: http://localhost:3000" +echo "👉 Backend: http://localhost:8080" +echo "👉 Pusher: http://localhost:8081" +echo "" +echo "To view logs, run: docker compose logs -f" +echo "To stop Omi, run: docker compose down" +echo "==========================================" From 7f32df1662533231706904b2444eb4e7e962057f Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Fri, 2 Jan 2026 02:19:59 -0500 Subject: [PATCH 2/8] feat: implement mock fallbacks for local/offline deployment --- backend/database/_client.py | 38 ++++++++++++++++++- backend/database/vector_db.py | 29 ++++++++++++--- backend/main.py | 15 +++++--- backend/utils/conversations/search.py | 39 ++++++++++++++++---- backend/utils/other/storage.py | 53 ++++++++++++++++++++++++--- backend/utils/translation.py | 28 +++++++++++++- 6 files changed, 175 insertions(+), 27 deletions(-) diff --git a/backend/database/_client.py b/backend/database/_client.py index 943d6e60de..77c7dacb2e 100644 --- a/backend/database/_client.py +++ b/backend/database/_client.py @@ -4,6 +4,38 @@ import uuid from google.cloud import firestore +from google.auth.exceptions import DefaultCredentialsError + +class MockFirestore: + def collection(self, name): + return MockCollection() + +class MockCollection: + def stream(self): + return [] + def document(self, doc_id): + return MockDocument(doc_id) + def add(self, data): + return None + def where(self, field, op, value): + return self + +class MockDocument: + def __init__(self, doc_id): + self.id = doc_id + def set(self, data): + return None + def get(self): + return MockSnapshot() + def update(self, data): + return None + def delete(self): + return None + +class MockSnapshot: + exists = False + def to_dict(self): + return {} if os.environ.get('SERVICE_ACCOUNT_JSON'): service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) @@ -11,7 +43,11 @@ with open('google-credentials.json', 'w') as f: json.dump(service_account_info, f) -db = firestore.Client() +try: + db = firestore.Client() +except (DefaultCredentialsError, Exception) as e: + print(f"⚠️ Warning: Firestore connection failed ({e}). Using MockFirestore for local dev.") + db = MockFirestore() def get_users_uid(): diff --git a/backend/database/vector_db.py b/backend/database/vector_db.py index f9a883d5e4..ce501e5dae 100644 --- a/backend/database/vector_db.py +++ b/backend/database/vector_db.py @@ -9,11 +9,30 @@ from models.conversation import Conversation from utils.llm.clients import embeddings -if os.getenv('PINECONE_API_KEY') is not None: - pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY', '')) - index = pc.Index(os.getenv('PINECONE_INDEX_NAME', '')) -else: - index = None +class MockIndex: + def upsert(self, vectors, namespace): + print(f"Mock upsert: {len(vectors)} vectors") + return {"upserted_count": len(vectors)} + def query(self, vector, top_k, include_metadata=False, filter=None, namespace=None, include_values=False): + print("Mock query") + return {"matches": []} + def update(self, id, set_metadata, namespace): + print(f"Mock update: {id}") + return {} + def delete(self, ids, namespace): + print(f"Mock delete: {ids}") + return {} + +try: + if os.getenv('PINECONE_API_KEY'): + pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY')) + index = pc.Index(os.getenv('PINECONE_INDEX_NAME', '')) + else: + print("⚠️ Warning: PINECONE_API_KEY not set. Using MockIndex.") + index = MockIndex() +except Exception as e: + print(f"⚠️ Warning: Pinecone init failed ({e}). Using MockIndex.") + index = MockIndex() def _get_data(uid: str, conversation_id: str, vector: List[float]): diff --git a/backend/main.py b/backend/main.py index c1e5f713be..09c2247ca3 100644 --- a/backend/main.py +++ b/backend/main.py @@ -43,12 +43,15 @@ from utils.other.timeout import TimeoutMiddleware -if os.environ.get('SERVICE_ACCOUNT_JSON'): - service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) - credentials = firebase_admin.credentials.Certificate(service_account_info) - firebase_admin.initialize_app(credentials) -else: - firebase_admin.initialize_app() +try: + if os.environ.get('SERVICE_ACCOUNT_JSON'): + service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) + credentials = firebase_admin.credentials.Certificate(service_account_info) + firebase_admin.initialize_app(credentials) + else: + firebase_admin.initialize_app() +except Exception as e: + print(f"⚠️ Warning: Firebase Admin initialization failed ({e}). Auth & DB features may not work.") app = FastAPI() diff --git a/backend/utils/conversations/search.py b/backend/utils/conversations/search.py index 9892b8b4d1..4a4c3bd95e 100644 --- a/backend/utils/conversations/search.py +++ b/backend/utils/conversations/search.py @@ -5,13 +5,38 @@ import typesense -client = typesense.Client( - { - 'nodes': [{'host': os.getenv('TYPESENSE_HOST'), 'port': os.getenv('TYPESENSE_HOST_PORT'), 'protocol': 'https'}], - 'api_key': os.getenv('TYPESENSE_API_KEY'), - 'connection_timeout_seconds': 2, - } -) +class MockTypesenseClient: + def __init__(self): + self.collections = MockCollections() + +class MockCollections: + def __getitem__(self, key): + return MockDocuments() + +class MockDocuments: + @property + def documents(self): + return self + + def search(self, params): + print(f"Mock search with params: {params}") + return {'hits': [], 'found': 0} + +try: + if os.getenv('TYPESENSE_API_KEY'): + client = typesense.Client( + { + 'nodes': [{'host': os.getenv('TYPESENSE_HOST'), 'port': os.getenv('TYPESENSE_HOST_PORT'), 'protocol': 'https'}], + 'api_key': os.getenv('TYPESENSE_API_KEY'), + 'connection_timeout_seconds': 2, + } + ) + else: + print("⚠️ Warning: TYPESENSE_API_KEY not set. Using MockTypesenseClient.") + client = MockTypesenseClient() +except Exception as e: + print(f"⚠️ Warning: Typesense init failed ({e}). Using MockTypesenseClient.") + client = MockTypesenseClient() def search_conversations( diff --git a/backend/utils/other/storage.py b/backend/utils/other/storage.py index 8089b9a8fa..8b82b9488a 100644 --- a/backend/utils/other/storage.py +++ b/backend/utils/other/storage.py @@ -17,12 +17,53 @@ from utils import encryption from database import users as users_db -if os.environ.get('SERVICE_ACCOUNT_JSON'): - service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) - credentials = service_account.Credentials.from_service_account_info(service_account_info) - storage_client = storage.Client(credentials=credentials) -else: - storage_client = storage.Client() +class MockStorageClient: + def bucket(self, name): + return MockBucket(name) + +class MockBucket: + def __init__(self, name): + self.name = name + def blob(self, name): + return MockBlob(name, self.name) + def list_blobs(self, prefix=None): + return [] + +class MockBlob: + def __init__(self, name, bucket_name): + self.name = name + self.bucket_name = bucket_name + self.size = 0 + self.time_created = None + self.metadata = {} + self.cache_control = None + def upload_from_filename(self, filename): + print(f"Mock upload from filename: {filename} to {self.name}") + def upload_from_string(self, data, content_type=None): + print(f"Mock upload from string to {self.name}") + def download_to_filename(self, filename): + print(f"Mock download to {filename} from {self.name}") + def delete(self): + print(f"Mock delete {self.name}") + def exists(self): + return False + def generate_signed_url(self, **kwargs): + return f"http://localhost:8080/_mock_signed_url/{self.bucket_name}/{self.name}" + def reload(self): + pass + def download_as_bytes(self): + return b"" + +try: + if os.environ.get('SERVICE_ACCOUNT_JSON'): + service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) + credentials = service_account.Credentials.from_service_account_info(service_account_info) + storage_client = storage.Client(credentials=credentials) + else: + storage_client = storage.Client() +except Exception as e: + print(f"⚠️ Warning: Google Storage connection failed ({e}). Using MockStorageClient for local dev.") + storage_client = MockStorageClient() speech_profiles_bucket = os.getenv('BUCKET_SPEECH_PROFILES') postprocessing_audio_bucket = os.getenv('BUCKET_POSTPROCESSING') diff --git a/backend/utils/translation.py b/backend/utils/translation.py index 124887624b..eb7edd8a49 100644 --- a/backend/utils/translation.py +++ b/backend/utils/translation.py @@ -111,8 +111,32 @@ r'\b(' + '|'.join(re.escape(word) for word in _non_lexical_utterances) + r')\b', re.IGNORECASE ) -# Initialize the translation client globally -_client = translate_v3.TranslationServiceClient() +class MockTranslationServiceClient: + def detect_language(self, parent, content, mime_type): + print(f"Mock detect language: {content[:20]}...") + return MockDetectResponse() + + def translate_text(self, contents, parent, mime_type, target_language_code): + print(f"Mock translate text to {target_language_code}") + return MockTranslateResponse(contents[0]) + +class MockDetectResponse: + languages = [] + +class MockTranslateResponse: + def __init__(self, text): + self.translations = [MockTranslation(text)] + +class MockTranslation: + def __init__(self, text): + self.translated_text = text + +try: + _client = translate_v3.TranslationServiceClient() +except Exception as e: + print(f"⚠️ Warning: Google Translation init failed ({e}). Using MockTranslationServiceClient.") + _client = MockTranslationServiceClient() + _parent = f"projects/{PROJECT_ID}/locations/global" _mime_type = "text/plain" From 28b14aa09c3d887a5f1f9796af9b33265e5762e1 Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Fri, 2 Jan 2026 02:58:33 -0500 Subject: [PATCH 3/8] security: generate random secrets in setup.sh and refine exception handling --- backend/database/_client.py | 2 +- backend/database/vector_db.py | 4 ++-- backend/main.py | 4 +++- backend/utils/conversations/search.py | 4 +++- backend/utils/other/storage.py | 4 +++- backend/utils/translation.py | 4 +++- setup.sh | 10 ++++++++++ 7 files changed, 25 insertions(+), 7 deletions(-) diff --git a/backend/database/_client.py b/backend/database/_client.py index 77c7dacb2e..009e37fe9d 100644 --- a/backend/database/_client.py +++ b/backend/database/_client.py @@ -45,7 +45,7 @@ def to_dict(self): try: db = firestore.Client() -except (DefaultCredentialsError, Exception) as e: +except (DefaultCredentialsError, ValueError) as e: print(f"⚠️ Warning: Firestore connection failed ({e}). Using MockFirestore for local dev.") db = MockFirestore() diff --git a/backend/database/vector_db.py b/backend/database/vector_db.py index ce501e5dae..351eabe9d0 100644 --- a/backend/database/vector_db.py +++ b/backend/database/vector_db.py @@ -4,7 +4,7 @@ from datetime import datetime, timezone, timedelta from typing import List -from pinecone import Pinecone +from pinecone import Pinecone, PineconeConfigurationError from models.conversation import Conversation from utils.llm.clients import embeddings @@ -30,7 +30,7 @@ def delete(self, ids, namespace): else: print("⚠️ Warning: PINECONE_API_KEY not set. Using MockIndex.") index = MockIndex() -except Exception as e: +except (PineconeConfigurationError, ValueError, KeyError) as e: print(f"⚠️ Warning: Pinecone init failed ({e}). Using MockIndex.") index = MockIndex() diff --git a/backend/main.py b/backend/main.py index 09c2247ca3..3a0befe77a 100644 --- a/backend/main.py +++ b/backend/main.py @@ -43,6 +43,8 @@ from utils.other.timeout import TimeoutMiddleware +from google.auth.exceptions import DefaultCredentialsError + try: if os.environ.get('SERVICE_ACCOUNT_JSON'): service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) @@ -50,7 +52,7 @@ firebase_admin.initialize_app(credentials) else: firebase_admin.initialize_app() -except Exception as e: +except (DefaultCredentialsError, ValueError) as e: print(f"⚠️ Warning: Firebase Admin initialization failed ({e}). Auth & DB features may not work.") app = FastAPI() diff --git a/backend/utils/conversations/search.py b/backend/utils/conversations/search.py index 4a4c3bd95e..734c7e9f85 100644 --- a/backend/utils/conversations/search.py +++ b/backend/utils/conversations/search.py @@ -5,6 +5,8 @@ import typesense +from typesense.exceptions import ConfigError + class MockTypesenseClient: def __init__(self): self.collections = MockCollections() @@ -34,7 +36,7 @@ def search(self, params): else: print("⚠️ Warning: TYPESENSE_API_KEY not set. Using MockTypesenseClient.") client = MockTypesenseClient() -except Exception as e: +except (ConfigError, ValueError, KeyError) as e: print(f"⚠️ Warning: Typesense init failed ({e}). Using MockTypesenseClient.") client = MockTypesenseClient() diff --git a/backend/utils/other/storage.py b/backend/utils/other/storage.py index 8b82b9488a..81893c77b0 100644 --- a/backend/utils/other/storage.py +++ b/backend/utils/other/storage.py @@ -54,6 +54,8 @@ def reload(self): def download_as_bytes(self): return b"" +from google.auth.exceptions import DefaultCredentialsError + try: if os.environ.get('SERVICE_ACCOUNT_JSON'): service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) @@ -61,7 +63,7 @@ def download_as_bytes(self): storage_client = storage.Client(credentials=credentials) else: storage_client = storage.Client() -except Exception as e: +except (DefaultCredentialsError, ValueError, KeyError) as e: print(f"⚠️ Warning: Google Storage connection failed ({e}). Using MockStorageClient for local dev.") storage_client = MockStorageClient() diff --git a/backend/utils/translation.py b/backend/utils/translation.py index eb7edd8a49..8470cdb358 100644 --- a/backend/utils/translation.py +++ b/backend/utils/translation.py @@ -131,9 +131,11 @@ class MockTranslation: def __init__(self, text): self.translated_text = text +from google.auth.exceptions import DefaultCredentialsError + try: _client = translate_v3.TranslationServiceClient() -except Exception as e: +except (DefaultCredentialsError, ValueError, KeyError) as e: print(f"⚠️ Warning: Google Translation init failed ({e}). Using MockTranslationServiceClient.") _client = MockTranslationServiceClient() diff --git a/setup.sh b/setup.sh index 2162dc4c4e..fc44a0718b 100755 --- a/setup.sh +++ b/setup.sh @@ -25,6 +25,16 @@ fi if [ ! -f .env ]; then echo "📄 Creating .env from .env.example..." cp .env.example .env + + # Generate secure random secrets + ADMIN_KEY=$(openssl rand -hex 32) + ENCRYPTION_SECRET=$(openssl rand -hex 32) + + # Update .env with generated secrets + sed -i "s/ADMIN_KEY=.*/ADMIN_KEY=$ADMIN_KEY/" .env + sed -i "s/ENCRYPTION_SECRET=.*/ENCRYPTION_SECRET=$ENCRYPTION_SECRET/" .env + + echo "✅ Generated secure random keys for ADMIN_KEY and ENCRYPTION_SECRET." echo "⚠️ Action Required: Please edit the .env file and add your API keys." echo " At minimum, you need: DEEPGRAM_API_KEY and OPENAI_API_KEY." From 3ab9390b608cac6833f8921227a295cd5d480b7b Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Fri, 2 Jan 2026 04:24:53 -0500 Subject: [PATCH 4/8] feat: implement cascading delete for knowledge graph --- backend/database/knowledge_graph.py | 59 +++++++++++++++++++++++++++++ backend/database/memories.py | 9 +++++ 2 files changed, 68 insertions(+) diff --git a/backend/database/knowledge_graph.py b/backend/database/knowledge_graph.py index 87b5d7766a..6a77317b90 100644 --- a/backend/database/knowledge_graph.py +++ b/backend/database/knowledge_graph.py @@ -4,6 +4,7 @@ from google.cloud import firestore from google.cloud.firestore_v1 import FieldFilter +from google.api_core import exceptions as google_exceptions from ._client import db @@ -244,3 +245,61 @@ def _batch_delete(coll_ref): edges_ref = user_ref.collection(knowledge_edges_collection) _batch_delete(edges_ref) + + +def cleanup_for_memory(uid: str, memory_id: str): + """ + Removes a memory_id from all nodes and edges in the knowledge graph. + If a node or edge is no longer associated with any memories, it is deleted. + Also removes edges that point to a deleted node. + """ + try: + user_ref = db.collection(users_collection).document(uid) + nodes_ref = user_ref.collection(knowledge_nodes_collection) + edges_ref = user_ref.collection(knowledge_edges_collection) + + batch = db.batch() + nodes_to_delete = set() + + # 1. Process nodes and identify which ones to delete + nodes_query = nodes_ref.where(filter=FieldFilter('memory_ids', 'array_contains', memory_id)) + for doc in nodes_query.stream(): + node_data = doc.to_dict() + memory_ids = node_data.get('memory_ids', []) + + if len(memory_ids) == 1 and memory_ids[0] == memory_id: + nodes_to_delete.add(doc.id) + batch.delete(doc.reference) + else: + batch.update(doc.reference, {'memory_ids': firestore.ArrayRemove([memory_id])}) + + # 2. Process edges, considering nodes that will be deleted + edges_to_process_query = edges_ref.where(filter=FieldFilter('memory_ids', 'array_contains', memory_id)) + for doc in edges_to_process_query.stream(): + edge_data = doc.to_dict() + memory_ids = edge_data.get('memory_ids', []) + + # Condition 1: Edge will have no memories left + # Condition 2: Edge's source or target node is being deleted + if (len(memory_ids) == 1 and memory_ids[0] == memory_id) or \ + (edge_data.get('source_id') in nodes_to_delete) or \ + (edge_data.get('target_id') in nodes_to_delete): + batch.delete(doc.reference) + else: + batch.update(doc.reference, {'memory_ids': firestore.ArrayRemove([memory_id])}) + + # 3. Final pass for any other edges connected to deleted nodes (orphaned edges) + if nodes_to_delete: + source_query = edges_ref.where(filter=FieldFilter('source_id', 'in', list(nodes_to_delete))) + for doc in source_query.stream(): + batch.delete(doc.reference) + + target_query = edges_ref.where(filter=FieldFilter('target_id', 'in', list(nodes_to_delete))) + for doc in target_query.stream(): + batch.delete(doc.reference) + + batch.commit() + print(f"Knowledge graph cleanup complete for memory_id: {memory_id}") + except (google_exceptions.GoogleAPICallError, ValueError) as e: + print(f"Error during knowledge graph cleanup for memory_id {memory_id}: {e}") + pass diff --git a/backend/database/memories.py b/backend/database/memories.py index 5d31e1ce8a..dcab2d4e09 100644 --- a/backend/database/memories.py +++ b/backend/database/memories.py @@ -7,6 +7,7 @@ from ._client import db from database import users as users_db +from database import knowledge_graph as kg_db from utils import encryption from .helpers import set_data_protection_level, prepare_for_write, prepare_for_read @@ -223,16 +224,24 @@ def delete_memory(uid: str, memory_id: str): memories_ref = user_ref.collection(memories_collection) memory_ref = memories_ref.document(memory_id) memory_ref.delete() + + # Trigger cascading cleanup for the knowledge graph + kg_db.cleanup_for_memory(uid, memory_id) def delete_all_memories(uid: str): user_ref = db.collection(users_collection).document(uid) memories_ref = user_ref.collection(memories_collection) + + # Efficiently delete all documents in the collection batch = db.batch() for doc in memories_ref.stream(): batch.delete(doc.reference) batch.commit() + # Trigger a single, efficient cleanup of the entire knowledge graph + kg_db.delete_knowledge_graph(uid) + def delete_memories_for_conversation(uid: str, memory_id: str): batch = db.batch() From 7ac25b11e85e7c261e70dc1cd9c697e7bab70426 Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Fri, 2 Jan 2026 04:32:13 -0500 Subject: [PATCH 5/8] fix: implement robust, transactional KG cleanup addressing critical review --- backend/database/knowledge_graph.py | 116 +++++++++++++++++----------- 1 file changed, 71 insertions(+), 45 deletions(-) diff --git a/backend/database/knowledge_graph.py b/backend/database/knowledge_graph.py index 6a77317b90..39cfe3101a 100644 --- a/backend/database/knowledge_graph.py +++ b/backend/database/knowledge_graph.py @@ -249,57 +249,83 @@ def _batch_delete(coll_ref): def cleanup_for_memory(uid: str, memory_id: str): """ - Removes a memory_id from all nodes and edges in the knowledge graph. + Removes a memory_id from all nodes and edges in the knowledge graph atomically. If a node or edge is no longer associated with any memories, it is deleted. Also removes edges that point to a deleted node. + Handles Firestore query limits and atomicity using transactions. """ try: user_ref = db.collection(users_collection).document(uid) - nodes_ref = user_ref.collection(knowledge_nodes_collection) - edges_ref = user_ref.collection(knowledge_edges_collection) - batch = db.batch() - nodes_to_delete = set() - - # 1. Process nodes and identify which ones to delete - nodes_query = nodes_ref.where(filter=FieldFilter('memory_ids', 'array_contains', memory_id)) - for doc in nodes_query.stream(): - node_data = doc.to_dict() - memory_ids = node_data.get('memory_ids', []) - - if len(memory_ids) == 1 and memory_ids[0] == memory_id: - nodes_to_delete.add(doc.id) - batch.delete(doc.reference) - else: - batch.update(doc.reference, {'memory_ids': firestore.ArrayRemove([memory_id])}) - - # 2. Process edges, considering nodes that will be deleted - edges_to_process_query = edges_ref.where(filter=FieldFilter('memory_ids', 'array_contains', memory_id)) - for doc in edges_to_process_query.stream(): - edge_data = doc.to_dict() - memory_ids = edge_data.get('memory_ids', []) + @firestore.transactional + def update_in_transaction(transaction, nodes_to_delete_ids): + # Fetch nodes and edges that currently contain memory_id + nodes_query = user_ref.collection(knowledge_nodes_collection).where(filter=FieldFilter('memory_ids', 'array_contains', memory_id)) + edges_query = user_ref.collection(knowledge_edges_collection).where(filter=FieldFilter('memory_ids', 'array_contains', memory_id)) + + # Fetch relevant documents within the transaction + nodes_docs = list(nodes_query.stream()) + edges_docs = list(edges_query.stream()) + + # Track nodes that will be deleted to clean up related edges + nodes_fully_deleted_in_this_tx = set() + + # Process Nodes + for doc in nodes_docs: + node_data = doc.to_dict() + memory_ids = node_data.get('memory_ids', []) + + if len(memory_ids) == 1 and memory_ids[0] == memory_id: + # Node will be deleted as this is its only remaining memory_id + transaction.delete(doc.reference) + nodes_fully_deleted_in_this_tx.add(doc.id) + else: + # Only remove the memory_id + transaction.update(doc.reference, {'memory_ids': firestore.ArrayRemove([memory_id])}) + + # Process Edges (those explicitly linked to this memory_id) + for doc in edges_docs: + edge_data = doc.to_dict() + memory_ids = edge_data.get('memory_ids', []) + + if len(memory_ids) == 1 and memory_ids[0] == memory_id: + # Edge will be deleted as this is its only remaining memory_id + transaction.delete(doc.reference) + else: + # Only remove the memory_id + transaction.update(doc.reference, {'memory_ids': firestore.ArrayRemove([memory_id])}) + + # Process potentially orphaned edges (those whose source/target nodes are deleted in this transaction) + if nodes_fully_deleted_in_this_tx: + # Firestore 'in' query limit is 10, so chunk the node IDs if necessary + chunk_size = 10 + nodes_chunks = [list(nodes_fully_deleted_in_this_tx)[i:i + chunk_size] for i in range(0, len(nodes_fully_deleted_in_this_tx), chunk_size)] + + for chunk in nodes_chunks: + # Delete edges where source node is in the chunk + source_edges_query = user_ref.collection(knowledge_edges_collection).where(filter=FieldFilter('source_id', 'in', chunk)) + for doc in source_edges_query.stream(): + transaction.delete(doc.reference) + + # Delete edges where target node is in the chunk + target_edges_query = user_ref.collection(knowledge_edges_collection).where(filter=FieldFilter('target_id', 'in', chunk)) + for doc in target_edges_query.stream(): + transaction.delete(doc.reference) - # Condition 1: Edge will have no memories left - # Condition 2: Edge's source or target node is being deleted - if (len(memory_ids) == 1 and memory_ids[0] == memory_id) or \ - (edge_data.get('source_id') in nodes_to_delete) or \ - (edge_data.get('target_id') in nodes_to_delete): - batch.delete(doc.reference) - else: - batch.update(doc.reference, {'memory_ids': firestore.ArrayRemove([memory_id])}) + print(f"Knowledge graph transaction complete for memory_id: {memory_id}") - # 3. Final pass for any other edges connected to deleted nodes (orphaned edges) - if nodes_to_delete: - source_query = edges_ref.where(filter=FieldFilter('source_id', 'in', list(nodes_to_delete))) - for doc in source_query.stream(): - batch.delete(doc.reference) + # Run the transaction + transaction = db.transaction() + update_in_transaction(transaction, set()) # Pass an empty set for initial call. Nodes to delete are determined inside. - target_query = edges_ref.where(filter=FieldFilter('target_id', 'in', list(nodes_to_delete))) - for doc in target_query.stream(): - batch.delete(doc.reference) - - batch.commit() - print(f"Knowledge graph cleanup complete for memory_id: {memory_id}") - except (google_exceptions.GoogleAPICallError, ValueError) as e: - print(f"Error during knowledge graph cleanup for memory_id {memory_id}: {e}") - pass + except google_exceptions.GoogleAPICallError as e: + print(f"ERROR: Firestore API error during KG cleanup for memory_id {memory_id}: {e}") + raise # Re-raise to indicate a critical failure + + except ValueError as e: + print(f"ERROR: Data validation error during KG cleanup for memory_id {memory_id}: {e}") + raise # Re-raise to indicate a critical failure + + except Exception as e: # Catch any other unexpected errors + print(f"ERROR: Unexpected error during KG cleanup for memory_id {memory_id}: {e}") + raise # Re-raise to indicate a critical failure From 704c97e52511dd0416c1108074cd24a48699489a Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Sat, 3 Jan 2026 12:44:22 -0500 Subject: [PATCH 6/8] chore(cleanup): remove mock db code from PR per review --- backend/database/_client.py | 41 +++---------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/backend/database/_client.py b/backend/database/_client.py index 009e37fe9d..1e2c14264c 100644 --- a/backend/database/_client.py +++ b/backend/database/_client.py @@ -4,38 +4,7 @@ import uuid from google.cloud import firestore -from google.auth.exceptions import DefaultCredentialsError - -class MockFirestore: - def collection(self, name): - return MockCollection() - -class MockCollection: - def stream(self): - return [] - def document(self, doc_id): - return MockDocument(doc_id) - def add(self, data): - return None - def where(self, field, op, value): - return self - -class MockDocument: - def __init__(self, doc_id): - self.id = doc_id - def set(self, data): - return None - def get(self): - return MockSnapshot() - def update(self, data): - return None - def delete(self): - return None - -class MockSnapshot: - exists = False - def to_dict(self): - return {} + if os.environ.get('SERVICE_ACCOUNT_JSON'): service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) @@ -43,11 +12,7 @@ def to_dict(self): with open('google-credentials.json', 'w') as f: json.dump(service_account_info, f) -try: - db = firestore.Client() -except (DefaultCredentialsError, ValueError) as e: - print(f"⚠️ Warning: Firestore connection failed ({e}). Using MockFirestore for local dev.") - db = MockFirestore() +db = firestore.Client() def get_users_uid(): @@ -59,4 +24,4 @@ def document_id_from_seed(seed: str) -> uuid.UUID: """Avoid repeating the same data""" seed_hash = hashlib.sha256(seed.encode('utf-8')).digest() generated_uuid = uuid.UUID(bytes=seed_hash[:16], version=4) - return str(generated_uuid) + return str(generated_uuid) \ No newline at end of file From 212bdb417fe1bed1526f29e0b0d14102e8f54914 Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Sat, 3 Jan 2026 13:13:25 -0500 Subject: [PATCH 7/8] chore: revert unrelated files from PR --- backend/database/_client.py | 189 +++++++++++++++++++++++++++++++++++- docker-compose.yml | 4 + 2 files changed, 192 insertions(+), 1 deletion(-) diff --git a/backend/database/_client.py b/backend/database/_client.py index 1e2c14264c..2b2e8b47b8 100644 --- a/backend/database/_client.py +++ b/backend/database/_client.py @@ -2,9 +2,192 @@ import json import os import uuid +from typing import Dict, Any from google.cloud import firestore +from google.auth.exceptions import DefaultCredentialsError +# Constants for local persistence +DATA_DIR = '/app/data' +DB_FILE = os.path.join(DATA_DIR, 'firestore_mock.json') + +class PersistentMockFirestore: + _instance = None + _data: Dict[str, Dict[str, Any]] = {} + + def __new__(cls): + if cls._instance is None: + cls._instance = super(PersistentMockFirestore, cls).__new__(cls) + cls._instance._load() + return cls._instance + + def _load(self): + if os.path.exists(DB_FILE): + try: + with open(DB_FILE, 'r') as f: + self._data = json.load(f) + print(f"✅ Loaded persistent mock data from {DB_FILE}") + except Exception as e: + print(f"⚠️ Failed to load mock data: {e}") + self._data = {} + else: + self._data = {} + + def _save(self): + if not os.path.exists(DATA_DIR): + try: + os.makedirs(DATA_DIR) + except OSError: + # Might fail if not permission, but inside Docker usually OK + pass + try: + with open(DB_FILE, 'w') as f: + json.dump(self._data, f, default=str, indent=2) + except Exception as e: + print(f"⚠️ Failed to save mock data: {e}") + + def collection(self, name): + if name not in self._data: + self._data[name] = {} + return MockCollection(self, name) + + def batch(self): + return MockBatch(self) + +class MockBatch: + def __init__(self, db): + self.db = db + + def set(self, ref, data): + ref.set(data) + + def update(self, ref, data): + ref.update(data) + + def delete(self, ref): + ref.delete() + + def commit(self): + pass # Changes happen immediately in this simple mock + +class MockCollection: + def __init__(self, db, name, parent_doc=None): + self.db = db + self.name = name + self.parent_doc = parent_doc # For subcollections + + def _get_data(self): + # Handle subcollections: parent_doc.data[col_name] + if self.parent_doc: + if self.name not in self.parent_doc._get_data(): + self.parent_doc._get_data()[self.name] = {} + return self.parent_doc._get_data()[self.name] + return self.db._data[self.name] + + def document(self, doc_id=None): + if doc_id is None: + doc_id = str(uuid.uuid4()) + return MockDocument(self.db, self, doc_id) + + def add(self, data, doc_id=None): + if doc_id is None: + doc_id = str(uuid.uuid4()) + doc = self.document(doc_id) + doc.set(data) + return None, doc + + def stream(self): + # Return all docs in this collection + data = self._get_data() + return [MockDocument(self.db, self, doc_id) for doc_id in data.keys()] + + def where(self, *args, **kwargs): + # Basic mock support for chaining, doesn't actually filter yet + return self + + def limit(self, count): + return self + + def order_by(self, field, direction=None): + return self + +class MockDocument: + def __init__(self, db, collection, doc_id): + self.db = db + self.collection = collection + self.id = doc_id + + def _get_data(self): + col_data = self.collection._get_data() + if self.id not in col_data: + return None # Does not exist + return col_data[self.id] + + def set(self, data): + col_data = self.collection._get_data() + col_data[self.id] = data + self.db._save() + + def update(self, data): + current = self._get_data() + if current: + current.update(data) + self.db._save() + + def get(self): + data = self._get_data() + return MockSnapshot(self.id, data) + + def delete(self): + col_data = self.collection._get_data() + if self.id in col_data: + del col_data[self.id] + self.db._save() + + def collection(self, name): + # Subcollections require nested storage structure + # Simplified: storing subcollections in a special field '_collections' inside the doc data? + # Or simpler: Just return a dummy collection for now to prevent crashes, + # as implementing deep nested persistence in one file is complex. + # But wait, we want persistence. + # Let's try to store it in the doc data under `__collections__` key + current = self._get_data() + if current is None: + # Create doc implicitly? No, usually errors. + # But for mock, let's allow it + self.set({}) + current = self._get_data() + + if '__collections__' not in current: + current['__collections__'] = {} + + return MockSubCollection(self.db, name, current['__collections__']) + +class MockSubCollection(MockCollection): + def __init__(self, db, name, storage): + self.db = db + self.name = name + self.storage = storage # Reference to the dict holding this collection's data + + def _get_data(self): + if self.name not in self.storage: + self.storage[self.name] = {} + return self.storage[self.name] + +class MockSnapshot: + def __init__(self, doc_id, data): + self.id = doc_id + self._data = data + self.exists = data is not None + self.reference = None # Placeholder + + def to_dict(self): + if self._data and '__collections__' in self._data: + # Hide internal storage + d = self._data.copy() + del d['__collections__'] + return d + return self._data or {} if os.environ.get('SERVICE_ACCOUNT_JSON'): service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) @@ -12,7 +195,11 @@ with open('google-credentials.json', 'w') as f: json.dump(service_account_info, f) -db = firestore.Client() +try: + db = firestore.Client() +except (DefaultCredentialsError, ValueError) as e: + print(f"⚠️ Warning: Firestore connection failed ({e}). Using PersistentMockFirestore for local dev.") + db = PersistentMockFirestore() def get_users_uid(): diff --git a/docker-compose.yml b/docker-compose.yml index d5dab49edc..fee1bac949 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,6 +10,8 @@ services: - .env depends_on: - redis + volumes: + - ./data/backend:/app/data pusher: build: @@ -51,6 +53,8 @@ services: restart: always ports: - "6379:6379" + volumes: + - ./data/redis:/data # Optional: Diarizer (Requires NVIDIA GPU and NVIDIA Container Toolkit) # diarizer: From 4c900276959716568e534e51d8145e09cbfd182c Mon Sep 17 00:00:00 2001 From: fayerman-source Date: Sat, 3 Jan 2026 13:15:06 -0500 Subject: [PATCH 8/8] chore: strictly revert all unrelated files to upstream/main --- .env.example | 43 ------ backend/database/_client.py | 192 +------------------------- backend/database/vector_db.py | 31 +---- backend/main.py | 17 +-- backend/utils/conversations/search.py | 41 +----- backend/utils/other/storage.py | 55 +------- backend/utils/translation.py | 30 +--- docker-compose.yml | 75 ---------- setup.sh | 66 --------- 9 files changed, 29 insertions(+), 521 deletions(-) delete mode 100644 .env.example delete mode 100644 docker-compose.yml delete mode 100755 setup.sh diff --git a/.env.example b/.env.example deleted file mode 100644 index 2626455ded..0000000000 --- a/.env.example +++ /dev/null @@ -1,43 +0,0 @@ -# Omi One-Click Deployment Configuration -# Copy this to .env and fill in the values - -# --- Core API Keys --- -# Required for transcription -DEEPGRAM_API_KEY= -# Required for LLM features -OPENAI_API_KEY= -# Optional: Other LLM providers -ANTHROPIC_API_KEY= - -# --- Backend Configuration --- -REDIS_DB_HOST=redis -REDIS_DB_PORT=6379 -REDIS_DB_PASSWORD= - -# Vector Database (Pinecone is default in current code) -PINECONE_API_KEY= -PINECONE_INDEX_NAME=memories-backend - -# Database (Firebase/Firestore is default) -# Provide the JSON content of your service account if using Firestore -SERVICE_ACCOUNT_JSON= - -# --- Service URLs (Internal Docker Networking) --- -HOSTED_VAD_API_URL=http://vad:8080/v1/vad -HOSTED_SPEAKER_EMBEDDING_API_URL=http://diarizer:8080 -HOSTED_PUSHER_API_URL=http://pusher:8080 - -# --- Frontend Configuration --- -# Public URL of the backend (use localhost for local dev) -NEXT_PUBLIC_API_URL=http://localhost:8080 -# Firebase config for the frontend -NEXT_PUBLIC_FIREBASE_API_KEY= -NEXT_PUBLIC_FIREBASE_AUTH_DOMAIN= -NEXT_PUBLIC_FIREBASE_PROJECT_ID= -NEXT_PUBLIC_FIREBASE_STORAGE_BUCKET= -NEXT_PUBLIC_FIREBASE_MESSAGING_SENDER_ID= -NEXT_PUBLIC_FIREBASE_APP_ID= - -# --- Development / Debugging --- -ADMIN_KEY=some_secret_key -ENCRYPTION_SECRET=omi_default_secret_change_me diff --git a/backend/database/_client.py b/backend/database/_client.py index 2b2e8b47b8..943d6e60de 100644 --- a/backend/database/_client.py +++ b/backend/database/_client.py @@ -2,192 +2,8 @@ import json import os import uuid -from typing import Dict, Any from google.cloud import firestore -from google.auth.exceptions import DefaultCredentialsError - -# Constants for local persistence -DATA_DIR = '/app/data' -DB_FILE = os.path.join(DATA_DIR, 'firestore_mock.json') - -class PersistentMockFirestore: - _instance = None - _data: Dict[str, Dict[str, Any]] = {} - - def __new__(cls): - if cls._instance is None: - cls._instance = super(PersistentMockFirestore, cls).__new__(cls) - cls._instance._load() - return cls._instance - - def _load(self): - if os.path.exists(DB_FILE): - try: - with open(DB_FILE, 'r') as f: - self._data = json.load(f) - print(f"✅ Loaded persistent mock data from {DB_FILE}") - except Exception as e: - print(f"⚠️ Failed to load mock data: {e}") - self._data = {} - else: - self._data = {} - - def _save(self): - if not os.path.exists(DATA_DIR): - try: - os.makedirs(DATA_DIR) - except OSError: - # Might fail if not permission, but inside Docker usually OK - pass - try: - with open(DB_FILE, 'w') as f: - json.dump(self._data, f, default=str, indent=2) - except Exception as e: - print(f"⚠️ Failed to save mock data: {e}") - - def collection(self, name): - if name not in self._data: - self._data[name] = {} - return MockCollection(self, name) - - def batch(self): - return MockBatch(self) - -class MockBatch: - def __init__(self, db): - self.db = db - - def set(self, ref, data): - ref.set(data) - - def update(self, ref, data): - ref.update(data) - - def delete(self, ref): - ref.delete() - - def commit(self): - pass # Changes happen immediately in this simple mock - -class MockCollection: - def __init__(self, db, name, parent_doc=None): - self.db = db - self.name = name - self.parent_doc = parent_doc # For subcollections - - def _get_data(self): - # Handle subcollections: parent_doc.data[col_name] - if self.parent_doc: - if self.name not in self.parent_doc._get_data(): - self.parent_doc._get_data()[self.name] = {} - return self.parent_doc._get_data()[self.name] - return self.db._data[self.name] - - def document(self, doc_id=None): - if doc_id is None: - doc_id = str(uuid.uuid4()) - return MockDocument(self.db, self, doc_id) - - def add(self, data, doc_id=None): - if doc_id is None: - doc_id = str(uuid.uuid4()) - doc = self.document(doc_id) - doc.set(data) - return None, doc - - def stream(self): - # Return all docs in this collection - data = self._get_data() - return [MockDocument(self.db, self, doc_id) for doc_id in data.keys()] - - def where(self, *args, **kwargs): - # Basic mock support for chaining, doesn't actually filter yet - return self - - def limit(self, count): - return self - - def order_by(self, field, direction=None): - return self - -class MockDocument: - def __init__(self, db, collection, doc_id): - self.db = db - self.collection = collection - self.id = doc_id - - def _get_data(self): - col_data = self.collection._get_data() - if self.id not in col_data: - return None # Does not exist - return col_data[self.id] - - def set(self, data): - col_data = self.collection._get_data() - col_data[self.id] = data - self.db._save() - - def update(self, data): - current = self._get_data() - if current: - current.update(data) - self.db._save() - - def get(self): - data = self._get_data() - return MockSnapshot(self.id, data) - - def delete(self): - col_data = self.collection._get_data() - if self.id in col_data: - del col_data[self.id] - self.db._save() - - def collection(self, name): - # Subcollections require nested storage structure - # Simplified: storing subcollections in a special field '_collections' inside the doc data? - # Or simpler: Just return a dummy collection for now to prevent crashes, - # as implementing deep nested persistence in one file is complex. - # But wait, we want persistence. - # Let's try to store it in the doc data under `__collections__` key - current = self._get_data() - if current is None: - # Create doc implicitly? No, usually errors. - # But for mock, let's allow it - self.set({}) - current = self._get_data() - - if '__collections__' not in current: - current['__collections__'] = {} - - return MockSubCollection(self.db, name, current['__collections__']) - -class MockSubCollection(MockCollection): - def __init__(self, db, name, storage): - self.db = db - self.name = name - self.storage = storage # Reference to the dict holding this collection's data - - def _get_data(self): - if self.name not in self.storage: - self.storage[self.name] = {} - return self.storage[self.name] - -class MockSnapshot: - def __init__(self, doc_id, data): - self.id = doc_id - self._data = data - self.exists = data is not None - self.reference = None # Placeholder - - def to_dict(self): - if self._data and '__collections__' in self._data: - # Hide internal storage - d = self._data.copy() - del d['__collections__'] - return d - return self._data or {} if os.environ.get('SERVICE_ACCOUNT_JSON'): service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) @@ -195,11 +11,7 @@ def to_dict(self): with open('google-credentials.json', 'w') as f: json.dump(service_account_info, f) -try: - db = firestore.Client() -except (DefaultCredentialsError, ValueError) as e: - print(f"⚠️ Warning: Firestore connection failed ({e}). Using PersistentMockFirestore for local dev.") - db = PersistentMockFirestore() +db = firestore.Client() def get_users_uid(): @@ -211,4 +23,4 @@ def document_id_from_seed(seed: str) -> uuid.UUID: """Avoid repeating the same data""" seed_hash = hashlib.sha256(seed.encode('utf-8')).digest() generated_uuid = uuid.UUID(bytes=seed_hash[:16], version=4) - return str(generated_uuid) \ No newline at end of file + return str(generated_uuid) diff --git a/backend/database/vector_db.py b/backend/database/vector_db.py index 351eabe9d0..f9a883d5e4 100644 --- a/backend/database/vector_db.py +++ b/backend/database/vector_db.py @@ -4,35 +4,16 @@ from datetime import datetime, timezone, timedelta from typing import List -from pinecone import Pinecone, PineconeConfigurationError +from pinecone import Pinecone from models.conversation import Conversation from utils.llm.clients import embeddings -class MockIndex: - def upsert(self, vectors, namespace): - print(f"Mock upsert: {len(vectors)} vectors") - return {"upserted_count": len(vectors)} - def query(self, vector, top_k, include_metadata=False, filter=None, namespace=None, include_values=False): - print("Mock query") - return {"matches": []} - def update(self, id, set_metadata, namespace): - print(f"Mock update: {id}") - return {} - def delete(self, ids, namespace): - print(f"Mock delete: {ids}") - return {} - -try: - if os.getenv('PINECONE_API_KEY'): - pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY')) - index = pc.Index(os.getenv('PINECONE_INDEX_NAME', '')) - else: - print("⚠️ Warning: PINECONE_API_KEY not set. Using MockIndex.") - index = MockIndex() -except (PineconeConfigurationError, ValueError, KeyError) as e: - print(f"⚠️ Warning: Pinecone init failed ({e}). Using MockIndex.") - index = MockIndex() +if os.getenv('PINECONE_API_KEY') is not None: + pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY', '')) + index = pc.Index(os.getenv('PINECONE_INDEX_NAME', '')) +else: + index = None def _get_data(uid: str, conversation_id: str, vector: List[float]): diff --git a/backend/main.py b/backend/main.py index 3a0befe77a..c1e5f713be 100644 --- a/backend/main.py +++ b/backend/main.py @@ -43,17 +43,12 @@ from utils.other.timeout import TimeoutMiddleware -from google.auth.exceptions import DefaultCredentialsError - -try: - if os.environ.get('SERVICE_ACCOUNT_JSON'): - service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) - credentials = firebase_admin.credentials.Certificate(service_account_info) - firebase_admin.initialize_app(credentials) - else: - firebase_admin.initialize_app() -except (DefaultCredentialsError, ValueError) as e: - print(f"⚠️ Warning: Firebase Admin initialization failed ({e}). Auth & DB features may not work.") +if os.environ.get('SERVICE_ACCOUNT_JSON'): + service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) + credentials = firebase_admin.credentials.Certificate(service_account_info) + firebase_admin.initialize_app(credentials) +else: + firebase_admin.initialize_app() app = FastAPI() diff --git a/backend/utils/conversations/search.py b/backend/utils/conversations/search.py index 734c7e9f85..9892b8b4d1 100644 --- a/backend/utils/conversations/search.py +++ b/backend/utils/conversations/search.py @@ -5,40 +5,13 @@ import typesense -from typesense.exceptions import ConfigError - -class MockTypesenseClient: - def __init__(self): - self.collections = MockCollections() - -class MockCollections: - def __getitem__(self, key): - return MockDocuments() - -class MockDocuments: - @property - def documents(self): - return self - - def search(self, params): - print(f"Mock search with params: {params}") - return {'hits': [], 'found': 0} - -try: - if os.getenv('TYPESENSE_API_KEY'): - client = typesense.Client( - { - 'nodes': [{'host': os.getenv('TYPESENSE_HOST'), 'port': os.getenv('TYPESENSE_HOST_PORT'), 'protocol': 'https'}], - 'api_key': os.getenv('TYPESENSE_API_KEY'), - 'connection_timeout_seconds': 2, - } - ) - else: - print("⚠️ Warning: TYPESENSE_API_KEY not set. Using MockTypesenseClient.") - client = MockTypesenseClient() -except (ConfigError, ValueError, KeyError) as e: - print(f"⚠️ Warning: Typesense init failed ({e}). Using MockTypesenseClient.") - client = MockTypesenseClient() +client = typesense.Client( + { + 'nodes': [{'host': os.getenv('TYPESENSE_HOST'), 'port': os.getenv('TYPESENSE_HOST_PORT'), 'protocol': 'https'}], + 'api_key': os.getenv('TYPESENSE_API_KEY'), + 'connection_timeout_seconds': 2, + } +) def search_conversations( diff --git a/backend/utils/other/storage.py b/backend/utils/other/storage.py index 81893c77b0..8089b9a8fa 100644 --- a/backend/utils/other/storage.py +++ b/backend/utils/other/storage.py @@ -17,55 +17,12 @@ from utils import encryption from database import users as users_db -class MockStorageClient: - def bucket(self, name): - return MockBucket(name) - -class MockBucket: - def __init__(self, name): - self.name = name - def blob(self, name): - return MockBlob(name, self.name) - def list_blobs(self, prefix=None): - return [] - -class MockBlob: - def __init__(self, name, bucket_name): - self.name = name - self.bucket_name = bucket_name - self.size = 0 - self.time_created = None - self.metadata = {} - self.cache_control = None - def upload_from_filename(self, filename): - print(f"Mock upload from filename: {filename} to {self.name}") - def upload_from_string(self, data, content_type=None): - print(f"Mock upload from string to {self.name}") - def download_to_filename(self, filename): - print(f"Mock download to {filename} from {self.name}") - def delete(self): - print(f"Mock delete {self.name}") - def exists(self): - return False - def generate_signed_url(self, **kwargs): - return f"http://localhost:8080/_mock_signed_url/{self.bucket_name}/{self.name}" - def reload(self): - pass - def download_as_bytes(self): - return b"" - -from google.auth.exceptions import DefaultCredentialsError - -try: - if os.environ.get('SERVICE_ACCOUNT_JSON'): - service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) - credentials = service_account.Credentials.from_service_account_info(service_account_info) - storage_client = storage.Client(credentials=credentials) - else: - storage_client = storage.Client() -except (DefaultCredentialsError, ValueError, KeyError) as e: - print(f"⚠️ Warning: Google Storage connection failed ({e}). Using MockStorageClient for local dev.") - storage_client = MockStorageClient() +if os.environ.get('SERVICE_ACCOUNT_JSON'): + service_account_info = json.loads(os.environ["SERVICE_ACCOUNT_JSON"]) + credentials = service_account.Credentials.from_service_account_info(service_account_info) + storage_client = storage.Client(credentials=credentials) +else: + storage_client = storage.Client() speech_profiles_bucket = os.getenv('BUCKET_SPEECH_PROFILES') postprocessing_audio_bucket = os.getenv('BUCKET_POSTPROCESSING') diff --git a/backend/utils/translation.py b/backend/utils/translation.py index 8470cdb358..124887624b 100644 --- a/backend/utils/translation.py +++ b/backend/utils/translation.py @@ -111,34 +111,8 @@ r'\b(' + '|'.join(re.escape(word) for word in _non_lexical_utterances) + r')\b', re.IGNORECASE ) -class MockTranslationServiceClient: - def detect_language(self, parent, content, mime_type): - print(f"Mock detect language: {content[:20]}...") - return MockDetectResponse() - - def translate_text(self, contents, parent, mime_type, target_language_code): - print(f"Mock translate text to {target_language_code}") - return MockTranslateResponse(contents[0]) - -class MockDetectResponse: - languages = [] - -class MockTranslateResponse: - def __init__(self, text): - self.translations = [MockTranslation(text)] - -class MockTranslation: - def __init__(self, text): - self.translated_text = text - -from google.auth.exceptions import DefaultCredentialsError - -try: - _client = translate_v3.TranslationServiceClient() -except (DefaultCredentialsError, ValueError, KeyError) as e: - print(f"⚠️ Warning: Google Translation init failed ({e}). Using MockTranslationServiceClient.") - _client = MockTranslationServiceClient() - +# Initialize the translation client globally +_client = translate_v3.TranslationServiceClient() _parent = f"projects/{PROJECT_ID}/locations/global" _mime_type = "text/plain" diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index fee1bac949..0000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,75 +0,0 @@ -services: - backend: - build: - context: . - dockerfile: backend/Dockerfile - restart: always - ports: - - "8080:8080" - env_file: - - .env - depends_on: - - redis - volumes: - - ./data/backend:/app/data - - pusher: - build: - context: . - dockerfile: backend/pusher/Dockerfile - restart: always - ports: - - "8081:8080" - env_file: - - .env - depends_on: - - redis - - frontend: - build: - context: . - dockerfile: web/frontend/Dockerfile - args: - - API_URL=http://localhost:8080 - - NEXT_PUBLIC_FIREBASE_API_KEY=${NEXT_PUBLIC_FIREBASE_API_KEY:-fake_key_for_build} - - NEXT_PUBLIC_FIREBASE_AUTH_DOMAIN=${NEXT_PUBLIC_FIREBASE_AUTH_DOMAIN:-omi-app.firebaseapp.com} - - NEXT_PUBLIC_FIREBASE_PROJECT_ID=${NEXT_PUBLIC_FIREBASE_PROJECT_ID:-omi-app} - - NEXT_PUBLIC_FIREBASE_STORAGE_BUCKET=${NEXT_PUBLIC_FIREBASE_STORAGE_BUCKET:-omi-app.appspot.com} - - NEXT_PUBLIC_FIREBASE_MESSAGING_SENDER_ID=${NEXT_PUBLIC_FIREBASE_MESSAGING_SENDER_ID:-123456789} - - NEXT_PUBLIC_FIREBASE_APP_ID=${NEXT_PUBLIC_FIREBASE_APP_ID:-1:123456789:web:abcdef} - - NEXT_PUBLIC_FIREBASE_MEASUREMENT_ID=${NEXT_PUBLIC_FIREBASE_MEASUREMENT_ID:-G-ABCDEF} - restart: always - ports: - - "3000:3000" - env_file: - - .env - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: - - backend - - redis: - image: redis:alpine - restart: always - ports: - - "6379:6379" - volumes: - - ./data/redis:/data - - # Optional: Diarizer (Requires NVIDIA GPU and NVIDIA Container Toolkit) - # diarizer: - # build: - # context: . - # dockerfile: backend/diarizer/Dockerfile - # restart: always - # ports: - # - "8082:8080" - # env_file: - # - .env - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: [gpu] diff --git a/setup.sh b/setup.sh deleted file mode 100755 index fc44a0718b..0000000000 --- a/setup.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -# Omi One-Click Setup Script -# Designed for backend developers and customers with low technical expertise. - -set -e - -echo "==========================================" -echo " 🚀 Omi One-Click Setup (Docker) " -echo "==========================================" - -# Check for Docker -if ! command -v docker &> /dev/null; then - echo "❌ Error: Docker is not installed. Please install Docker first: https://docs.docker.com/get-docker/" - exit 1 -fi - -# Check for Docker Compose -if ! docker compose version &> /dev/null; then - echo "❌ Error: Docker Compose is not installed. Please install it or use a newer version of Docker Desktop." - exit 1 -fi - -# Create .env if it doesn't exist -if [ ! -f .env ]; then - echo "📄 Creating .env from .env.example..." - cp .env.example .env - - # Generate secure random secrets - ADMIN_KEY=$(openssl rand -hex 32) - ENCRYPTION_SECRET=$(openssl rand -hex 32) - - # Update .env with generated secrets - sed -i "s/ADMIN_KEY=.*/ADMIN_KEY=$ADMIN_KEY/" .env - sed -i "s/ENCRYPTION_SECRET=.*/ENCRYPTION_SECRET=$ENCRYPTION_SECRET/" .env - - echo "✅ Generated secure random keys for ADMIN_KEY and ENCRYPTION_SECRET." - echo "⚠️ Action Required: Please edit the .env file and add your API keys." - echo " At minimum, you need: DEEPGRAM_API_KEY and OPENAI_API_KEY." - - # Optional: try to open the editor - if command -v nano &> /dev/null; then - read -p "Would you like to edit .env now? (y/n) " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - nano .env - fi - fi -fi - -# Build and Start -echo "🛠️ Building and starting Omi services..." -docker compose up -d --build - -echo "" -echo "==========================================" -echo "✅ Omi is now starting up!" -echo "" -echo "Services available at:" -echo "👉 Frontend: http://localhost:3000" -echo "👉 Backend: http://localhost:8080" -echo "👉 Pusher: http://localhost:8081" -echo "" -echo "To view logs, run: docker compose logs -f" -echo "To stop Omi, run: docker compose down" -echo "=========================================="