echo-mind-orchestrator/main.py at main · noctetemp/echo-mind-orchestrator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
"""
Echo Mind Backend - FastAPI Server with WebSocket Support
Phase 1.1: Backend Architecture Setup

This module provides the main FastAPI server with WebSocket endpoints for real-time audio streaming.
Based on pipecat-realtime-demo but adapted for Echo Mind's specific requirements.
"""

import logging
import os
import asyncio
import json
from typing import Any, Set
from datetime import datetime

import aiohttp
import uvicorn
from fastapi import APIRouter, FastAPI, WebSocket, HTTPException, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from dotenv import load_dotenv

from streaming_pipeline import EchoMindStreamingPipeline as EchoMindAudioPipeline
from heygen_service import EchoMindHeyGenService
from memory_service import MemoryService
from config import Config

# Load environment variables from .env file
load_dotenv()

# Global set to store active log WebSocket connections
log_websockets: Set[WebSocket] = set()

# Custom logging handler to broadcast logs to WebSocket clients
class WebSocketLogHandler(logging.Handler):
    def emit(self, record):
        if log_websockets:
            log_entry = {
                "type": "log",
                "timestamp": datetime.now().isoformat(),
                "level": record.levelname,
                "message": self.format(record),
                "module": record.name
            }

            # Broadcast to all connected WebSocket clients
            asyncio.create_task(broadcast_log(log_entry))

async def broadcast_log(log_entry: dict):
    """Broadcast log entry to all connected WebSocket clients"""
    if not log_websockets:
        return

    disconnected = set()
    for websocket in log_websockets.copy():
        try:
            await websocket.send_text(json.dumps(log_entry))
        except Exception:
            disconnected.add(websocket)

    # Remove disconnected WebSockets
    log_websockets.difference_update(disconnected)

# Configure logging with WebSocket handler
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Add WebSocket handler to root logger
websocket_handler = WebSocketLogHandler()
websocket_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(websocket_handler)

# Environment variables validation
REQUIRED_ENV_VARS = [
    "DEEPGRAM_API_KEY",
    "OPENAI_API_KEY",
    "HEYGEN_API_KEY",
    "BRAVE_API_KEY"
]

def validate_environment():
    """Validate that all required environment variables are set"""
    missing_vars = []
    for var in REQUIRED_ENV_VARS:
        if not os.getenv(var):
            missing_vars.append(var)

    if missing_vars:
        raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")

# Validate environment on startup
validate_environment()

# API Router
api_router = APIRouter()

@api_router.get("/health")
async def health_check():
    """Health check endpoint"""
    return JSONResponse(
        content={
            "status": "healthy",
            "service": "echo-mind-backend",
            "version": "1.0.0"
        }
    )

@api_router.get("/config")
async def get_config():
    """Get configuration information (without sensitive data)"""
    return JSONResponse(
        content={
            "services": {
                "deepgram": bool(os.getenv("DEEPGRAM_API_KEY")),
                "openai": bool(os.getenv("OPENAI_API_KEY")),
                "heygen": bool(os.getenv("HEYGEN_API_KEY")),
                "brave": bool(os.getenv("BRAVE_API_KEY"))
            },
            "features": {
                "vad_enabled": True,
                "function_calling": True,
                "real_time_processing": True
            }
        }
    )

# Global dictionary to track audio pipelines and their sessions
active_audio_pipelines = {}

# Global memory service - initialized at startup
global_memory_service = None

@api_router.websocket("/audio-stream")
async def websocket_audio_endpoint(websocket: WebSocket) -> None:
    """
    Simplified WebSocket endpoint for audio processing
    Processes audio and returns enhanced text for frontend to send to HeyGen
    """
    logger.info("WebSocket connection established for audio processing")

    try:
        await websocket.accept()

        # Initialize the simplified audio pipeline with meMOS memory service
        # Always create a fresh meMOS service for WebSocket to ensure proper connection
        from memory_service import MemoryService
        from config import Config
        memory_service = MemoryService(
            use_memos=True,
            memos_url=Config.MEMOS_REST_URL,
            memos_api_key=Config.MEMOS_API_KEY
        )
        logger.info(f"WebSocket using memory service: {memory_service.memory_type}")

        audio_pipeline = EchoMindAudioPipeline(websocket=websocket, memory_service=memory_service)

        # Store pipeline for session tracking
        pipeline_id = id(audio_pipeline)
        active_audio_pipelines[pipeline_id] = audio_pipeline

        try:
            # Run the audio processing pipeline
            await audio_pipeline.run()
        finally:
            # Clean up pipeline tracking
            active_audio_pipelines.pop(pipeline_id, None)

    except Exception as e:
        logger.error(f"WebSocket error: {e}")
        if websocket.client_state != websocket.client_state.DISCONNECTED:
            await websocket.close(code=1011, reason=f"Internal server error: {str(e)}")

@api_router.websocket("/logs-stream")
async def websocket_logs_endpoint(websocket: WebSocket) -> None:
    """
    WebSocket endpoint for streaming backend logs in real-time
    """
    logger.info("WebSocket connection established for logs streaming")

    try:
        await websocket.accept()
        log_websockets.add(websocket)

        # Send initial connection message
        welcome_msg = {
            "type": "log",
            "timestamp": datetime.now().isoformat(),
            "level": "SUCCESS",
            "message": "🔗 Connected to backend logs stream",
            "module": "logs-stream"
        }
        await websocket.send_text(json.dumps(welcome_msg))

        # Keep connection alive and handle disconnection
        try:
            while True:
                # Wait for any message (ping/pong or close)
                await websocket.receive_text()
        except WebSocketDisconnect:
            logger.info("Logs WebSocket client disconnected")

    except Exception as e:
        logger.error(f"Logs WebSocket error: {e}")
    finally:
        # Clean up on disconnect
        log_websockets.discard(websocket)
        logger.info("Logs WebSocket connection closed")

# HeyGen HTTP API Endpoints

@api_router.post("/heygen/session/new")
async def create_heygen_session():
    """
    Create a new HeyGen streaming session using /v1/streaming.new
    """
    try:
        heygen_service = EchoMindHeyGenService()
        session_data = await heygen_service.create_session()

        # Track session in all active audio pipelines
        session_id = session_data.get("session_id")
        if session_id:
            for pipeline in active_audio_pipelines.values():
                pipeline.add_session(session_id)

        # Log the actual response for debugging
        logger.info(f"HeyGen session response: {session_data}")

        return JSONResponse(content=session_data)
    except Exception as e:
        logger.error(f"Failed to create HeyGen session: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to create HeyGen session: {str(e)}")

@api_router.post("/heygen/session/{session_id}/start")
async def start_heygen_session(session_id: str):
    """
    Start a HeyGen streaming session using /v1/streaming.start

    Args:
        session_id: The session ID to start
    """
    try:
        heygen_service = EchoMindHeyGenService()
        result = await heygen_service.start_session(session_id)
        return JSONResponse(content=result)
    except Exception as e:
        logger.error(f"Failed to start HeyGen session {session_id}: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to start HeyGen session: {str(e)}")

@api_router.post("/heygen/session/{session_id}/task")
async def send_heygen_task(session_id: str, request: dict):
    """
    Send a text task to HeyGen using /v1/streaming.task

    Args:
        session_id: The session ID
        request: JSON body containing 'text' field
    """
    try:
        text = request.get("text")
        if not text:
            raise HTTPException(status_code=400, detail="Text field is required")

        heygen_service = EchoMindHeyGenService()
        result = await heygen_service.send_task(session_id, text)
        return JSONResponse(content=result)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Failed to send HeyGen task to session {session_id}: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to send HeyGen task: {str(e)}")

@api_router.post("/heygen/session/{session_id}/close")
async def close_heygen_session(session_id: str):
    """
    Close a HeyGen streaming session using /v1/streaming.close

    Args:
        session_id: The session ID to close
    """
    try:
        heygen_service = EchoMindHeyGenService()
        await heygen_service.close_session(session_id)

        # Remove session from all active audio pipelines
        for pipeline in active_audio_pipelines.values():
            pipeline.active_sessions.discard(session_id)

        return JSONResponse(content={"status": "closed", "session_id": session_id})
    except Exception as e:
        logger.error(f"Failed to close HeyGen session {session_id}: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to close HeyGen session: {str(e)}")

@api_router.post("/heygen/sessions/cleanup")
async def cleanup_all_sessions():
    """
    Clean up all active HeyGen sessions across all audio pipelines
    """
    try:
        total_sessions = 0
        cleaned_sessions = 0

        for pipeline in active_audio_pipelines.values():
            total_sessions += len(pipeline.active_sessions)
            await pipeline.cleanup_sessions()
            cleaned_sessions += total_sessions - len(pipeline.active_sessions)

        logger.info(f"Session cleanup completed: {cleaned_sessions}/{total_sessions} sessions closed")

        return JSONResponse(content={
            "status": "completed",
            "total_sessions": total_sessions,
            "cleaned_sessions": cleaned_sessions,
            "remaining_sessions": total_sessions - cleaned_sessions
        })
    except Exception as e:
        logger.error(f"Failed to cleanup sessions: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to cleanup sessions: {str(e)}")

# Memory Testing HTTP API Endpoints

@api_router.get("/memory/status")
async def get_memory_status():
    """
    Get memory service status and statistics
    """
    try:
        if not global_memory_service:
            raise HTTPException(status_code=503, detail="Memory service not initialized")

        stats = await global_memory_service.get_memory_stats()
        return JSONResponse(content=stats)
    except Exception as e:
        logger.error(f"Failed to get memory status: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to get memory status: {str(e)}")

@api_router.post("/memory/store")
async def store_conversation(request: dict):
    """
    Store a conversation turn in memory

    Args:
        request: JSON body containing 'user_input' and 'assistant_response' fields
    """
    try:
        if not global_memory_service:
            raise HTTPException(status_code=503, detail="Memory service not initialized")

        user_input = request.get("user_input")
        assistant_response = request.get("assistant_response")
        metadata = request.get("metadata", {})

        if not user_input or not assistant_response:
            raise HTTPException(status_code=400, detail="Both user_input and assistant_response are required")

        await global_memory_service.store_conversation(
            user_input=user_input,
            assistant_response=assistant_response,
            metadata=metadata
        )
        success = True

        return JSONResponse(content={
            "success": success,
            "message": "Conversation stored successfully" if success else "Failed to store conversation"
        })
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Failed to store conversation: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to store conversation: {str(e)}")

@api_router.post("/memory/search")
async def search_memories(request: dict):
    """
    Search through stored memories

    Args:
        request: JSON body containing 'query' and optional 'limit' fields
    """
    try:
        if not global_memory_service:
            raise HTTPException(status_code=503, detail="Memory service not initialized")

        query = request.get("query")
        limit = request.get("limit", 3)

        if not query:
            raise HTTPException(status_code=400, detail="Query field is required")

        memories = await global_memory_service.search_memory(query=query, limit=limit)

        return JSONResponse(content={
            "query": query,
            "limit": limit,
            "results": memories,
            "count": len(memories)
        })
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Failed to search memories: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to search memories: {str(e)}")

@api_router.post("/memory/context")
async def get_context(request: dict):
    """
    Get relevant context for current input

    Args:
        request: JSON body containing 'current_input' and optional 'max_turns' fields
    """
    try:
        if not global_memory_service:
            raise HTTPException(status_code=503, detail="Memory service not initialized")

        current_input = request.get("current_input")
        max_turns = request.get("max_turns", 5)

        if not current_input:
            raise HTTPException(status_code=400, detail="current_input field is required")

        context = await global_memory_service.get_conversation_context(limit=max_turns)

        return JSONResponse(content={
            "current_input": current_input,
            "max_turns": max_turns,
            "context": context,
            "count": len(context)
        })
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Failed to get context: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to get context: {str(e)}")

@api_router.post("/memory/test")
async def test_memory_system():
    """
    Run a comprehensive test of the memory system
    """
    try:
        if not global_memory_service:
            raise HTTPException(status_code=503, detail="Memory service not initialized")

        # Test data
        test_conversations = [
            {
                "user_input": "Hello, my name is John and I love programming in Python",
                "assistant_response": "Nice to meet you John! Python is a great programming language. What kind of projects do you work on?"
            },
            {
                "user_input": "I work on AI and machine learning projects",
                "assistant_response": "That's fascinating! AI and ML are exciting fields. Are you using any specific frameworks like TensorFlow or PyTorch?"
            },
            {
                "user_input": "I prefer PyTorch for deep learning",
                "assistant_response": "Excellent choice! PyTorch has great flexibility for research and development. What's your current project about?"
            }
        ]

        # Store test conversations
        stored_count = 0
        for conv in test_conversations:
            await global_memory_service.store_conversation(
                user_input=conv["user_input"],
                assistant_response=conv["assistant_response"],
                metadata={"test": True}
            )
            stored_count += 1

        # Test search functionality
        search_results = await global_memory_service.search_memory("programming", limit=5)

        # Test context retrieval
        context_results = await global_memory_service.get_conversation_context(limit=3)

        # Get final stats
        final_stats = await global_memory_service.get_memory_stats()

        return JSONResponse(content={
            "test_status": "completed",
            "conversations_stored": stored_count,
            "total_test_conversations": len(test_conversations),
            "search_results": {
                "query": "programming",
                "count": len(search_results),
                "results": search_results
            },
            "context_results": {
                "query": "What do you know about me?",
                "count": len(context_results),
                "results": context_results
            },
            "memory_stats": final_stats
        })
    except Exception as e:
        logger.error(f"Memory test failed: {e}")
        raise HTTPException(status_code=500, detail=f"Memory test failed: {str(e)}")

# FastAPI app initialization
app = FastAPI(
    title="Echo Mind Backend",
    description="Real-time AI avatar backend with voice processing and tool execution",
    version="1.0.0",
    docs_url="/docs",
    redoc_url="/redoc"
)

# Include API router
app.include_router(router=api_router, prefix="/api/v1")

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure appropriately for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Add direct WebSocket endpoint for easier frontend access
@app.websocket("/ws/audio")
async def websocket_audio_direct(websocket: WebSocket) -> None:
    """
    Direct WebSocket endpoint for audio processing (same as /api/v1/audio-stream)
    """
    logger.info("WebSocket connection established for audio processing")

    try:
        await websocket.accept()

        # Initialize the simplified audio pipeline with meMOS memory service
        # Always create a fresh meMOS service for WebSocket to ensure proper connection
        from memory_service import MemoryService
        from config import Config
        memory_service = MemoryService(
            use_memos=True,
            memos_url=Config.MEMOS_REST_URL,
            memos_api_key=Config.MEMOS_API_KEY
        )
        logger.info(f"WebSocket (/ws/audio) using memory service: {memory_service.memory_type}")

        audio_pipeline = EchoMindAudioPipeline(websocket=websocket, memory_service=memory_service)

        # Store pipeline for session tracking
        pipeline_id = id(audio_pipeline)
        active_audio_pipelines[pipeline_id] = audio_pipeline

        try:
            # Run the audio processing pipeline
            await audio_pipeline.run()
        finally:
            # Clean up pipeline tracking
            active_audio_pipelines.pop(pipeline_id, None)

    except Exception as e:
        logger.error(f"WebSocket error: {e}")
        if websocket.client_state != websocket.client_state.DISCONNECTED:
            await websocket.close(code=1011, reason=f"Internal server error: {str(e)}")

@app.on_event("startup")
async def startup_event():
    """Application startup event"""
    global global_memory_service

    logger.info("Echo Mind Backend starting up...")
    logger.info("Environment validation passed")
    logger.info("All required API keys are configured")

    # Initialize memory service at startup with memOS REST API
    logger.info("🧠 Initializing memory service with memOS REST API...")

    # Always use memOS REST API for memory storage
    global_memory_service = MemoryService(
        use_memos=True,
        memos_url=Config.MEMOS_REST_URL,
        memos_api_key=Config.MEMOS_API_KEY
    )

    # Get memory stats to verify initialization
    stats = await global_memory_service.get_memory_stats()
    logger.info(f"✅ Memory service initialized: {stats['memory_type']}")
    if stats.get('persistent_storage'):
        logger.info(f"💾 Persistent storage enabled")
    else:
        logger.info("⚠️ Using fallback in-memory storage")

@app.on_event("shutdown")
async def shutdown_event():
    """Application shutdown event"""
    logger.info("Echo Mind Backend shutting down...")

    # Clean up all active sessions before shutdown
    if active_audio_pipelines:
        logger.info(f"Cleaning up {len(active_audio_pipelines)} active audio pipelines...")
        for pipeline in active_audio_pipelines.values():
            try:
                await pipeline.cleanup_sessions()
            except Exception as e:
                logger.warning(f"Error during pipeline cleanup: {e}")

        active_audio_pipelines.clear()
        logger.info("All audio pipelines cleaned up")

if __name__ == "__main__":
    # Server configuration
    port = int(os.getenv("PORT", 8001))
    workers = int(os.getenv("WORKERS", 1))
    host = os.getenv("HOST", "0.0.0.0")

    logger.info(f"Starting Echo Mind Backend on {host}:{port} with {workers} workers")

    uvicorn.run(
        "main:app",
        host=host,
        port=port,
        workers=workers,
        log_level="info",
        reload=os.getenv("ENVIRONMENT") == "development"
    )