From 2fd158cd211103da57870cbbbfcd4af4aba4fc05 Mon Sep 17 00:00:00 2001 From: OpenClaw Assistant Date: Mon, 9 Mar 2026 20:23:39 +0800 Subject: [PATCH 1/3] fix: resolve multiple code quality issues - Rename stage3_memory_retrivel.py to stage3_memory_retrieval.py (typo fix) - Replace == None with is None (Python anti-pattern) - Replace != True with is not True (Python anti-pattern) - Replace bare except with except Exception - Remove duplicate 'rrf' entry in docstring - Remove unused MongoDB init script volume mount from docker-compose.yaml - Add missing env template setup step in STARTER_KIT.md quick start Fixes: #115, #113, #107, #97, #91, #90, #86 --- demo/utils/simple_memory_manager.py | 1 - docker-compose.yaml | 1 - docs/STARTER_KIT.md | 3 +++ docs/dev_docs/getting_started.md | 2 +- evaluation/src/adapters/evermemos/README.md | 4 ++-- ...age3_memory_retrivel.py => stage3_memory_retrieval.py} | 0 evaluation/src/adapters/evermemos_adapter.py | 8 ++++---- src/biz_layer/mem_db_operations.py | 2 +- src/biz_layer/mem_memorize.py | 6 +++--- .../persistence/repository/core_memory_raw_repository.py | 2 +- .../repository/group_profile_raw_repository.py | 2 +- .../group_user_profile_memory_raw_repository.py | 2 +- .../repository/episodic_memory_milvus_repository.py | 2 +- tests/test_conv_memcell_extractor.py | 2 +- tests/test_group_user_profile_memory_raw_repository.py | 2 +- 15 files changed, 20 insertions(+), 19 deletions(-) rename evaluation/src/adapters/evermemos/{stage3_memory_retrivel.py => stage3_memory_retrieval.py} (100%) diff --git a/demo/utils/simple_memory_manager.py b/demo/utils/simple_memory_manager.py index f5526650..b0b2908e 100644 --- a/demo/utils/simple_memory_manager.py +++ b/demo/utils/simple_memory_manager.py @@ -244,7 +244,6 @@ async def search( query: Query text top_k: Number of results to return (default: 3) mode: Retrieval mode (default: "rrf") - - "rrf": RRF fusion (recommended) - "keyword": Keyword retrieval (BM25) - "vector": Vector retrieval - "hybrid": Keyword + Vector + Rerank diff --git a/docker-compose.yaml b/docker-compose.yaml index 29407030..373f860b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -14,7 +14,6 @@ services: - "27017:27017" volumes: - mongodb_data:/data/db - - ./docker/mongodb/init:/docker-entrypoint-initdb.d networks: - memsys-network healthcheck: diff --git a/docs/STARTER_KIT.md b/docs/STARTER_KIT.md index 7d322b8d..2f5b9950 100644 --- a/docs/STARTER_KIT.md +++ b/docs/STARTER_KIT.md @@ -31,6 +31,9 @@ Welcome to the EverMemOS Competition! This starter kit will help you get up and git clone https://github.com/EverMind-AI/EverMemOS.git cd EverMemOS +# Copy environment template and configure (replace with your API keys) +cp env.template .env + # Start all services with Docker docker compose up -d diff --git a/docs/dev_docs/getting_started.md b/docs/dev_docs/getting_started.md index 695f8501..007dd94b 100644 --- a/docs/dev_docs/getting_started.md +++ b/docs/dev_docs/getting_started.md @@ -273,7 +273,7 @@ uv run python src/bootstrap.py evaluation/locomo_evaluation/stage1_memcells_extr # Run other evaluation stages uv run python src/bootstrap.py evaluation/locomo_evaluation/stage2_index_building.py -uv run python src/bootstrap.py evaluation/locomo_evaluation/stage3_memory_retrivel.py +uv run python src/bootstrap.py evaluation/locomo_evaluation/stage3_memory_retrieval.py ``` #### 2. Run Demo Scripts diff --git a/evaluation/src/adapters/evermemos/README.md b/evaluation/src/adapters/evermemos/README.md index 90d76da6..8ba13474 100644 --- a/evaluation/src/adapters/evermemos/README.md +++ b/evaluation/src/adapters/evermemos/README.md @@ -21,7 +21,7 @@ locomo_evaluation/ │ └── answer_prompts.py # Answer generation ├── stage1_memcells_extraction.py # Stage 1: Extract MemCells ├── stage2_index_building.py # Stage 2: Build indexes -├── stage3_memory_retrivel.py # Stage 3: Retrieve memories +├── stage3_memory_retrieval.py # Stage 3: Retrieve memories ├── stage4_response.py # Stage 4: Generate responses ├── stage5_eval.py # Stage 5: Evaluate results └── tools/ # Utility tools @@ -73,7 +73,7 @@ python evaluation/locomo_evaluation/stage1_memcells_extraction.py python evaluation/locomo_evaluation/stage2_index_building.py # Stage 3: Retrieve memories -python evaluation/locomo_evaluation/stage3_memory_retrivel.py +python evaluation/locomo_evaluation/stage3_memory_retrieval.py # Stage 4: Generate responses python evaluation/locomo_evaluation/stage4_response.py diff --git a/evaluation/src/adapters/evermemos/stage3_memory_retrivel.py b/evaluation/src/adapters/evermemos/stage3_memory_retrieval.py similarity index 100% rename from evaluation/src/adapters/evermemos/stage3_memory_retrivel.py rename to evaluation/src/adapters/evermemos/stage3_memory_retrieval.py diff --git a/evaluation/src/adapters/evermemos_adapter.py b/evaluation/src/adapters/evermemos_adapter.py index e0f78c55..c2959401 100644 --- a/evaluation/src/adapters/evermemos_adapter.py +++ b/evaluation/src/adapters/evermemos_adapter.py @@ -30,7 +30,7 @@ from evaluation.src.adapters.evermemos import ( stage1_memcells_extraction, stage2_index_building, - stage3_memory_retrivel, + stage3_memory_retrieval, stage4_response, ) @@ -497,7 +497,7 @@ async def search( if retrieval_mode == "agentic": # Agentic retrieval - top_results, metadata = await stage3_memory_retrivel.agentic_retrieval( + top_results, metadata = await stage3_memory_retrieval.agentic_retrieval( query=query, config=exp_config, llm_provider=self.llm_provider, @@ -508,7 +508,7 @@ async def search( ) elif retrieval_mode == "lightweight": # Lightweight retrieval - top_results, metadata = await stage3_memory_retrivel.lightweight_retrieval( + top_results, metadata = await stage3_memory_retrieval.lightweight_retrieval( query=query, emb_index=emb_index, bm25=bm25, @@ -517,7 +517,7 @@ async def search( ) else: # Default to hybrid retrieval - top_results = await stage3_memory_retrivel.hybrid_search_with_rrf( + top_results = await stage3_memory_retrieval.hybrid_search_with_rrf( query=query, emb_index=emb_index, bm25=bm25, diff --git a/src/biz_layer/mem_db_operations.py b/src/biz_layer/mem_db_operations.py index 66519a42..03800bfd 100644 --- a/src/biz_layer/mem_db_operations.py +++ b/src/biz_layer/mem_db_operations.py @@ -143,7 +143,7 @@ def _convert_timestamp_to_time( try: dt = from_iso_format(timestamp) return to_iso_format(dt) - except: + except Exception: # If parsing fails, return string directly return timestamp else: diff --git a/src/biz_layer/mem_memorize.py b/src/biz_layer/mem_memorize.py index e951a13c..89b4902a 100644 --- a/src/biz_layer/mem_memorize.py +++ b/src/biz_layer/mem_memorize.py @@ -1290,7 +1290,7 @@ async def memorize(request: MemorizeRequest) -> int: # ===== Preprocess and get historical data ===== if request.raw_data_type == RawDataType.CONVERSATION: request = await preprocess_conv_request(request, current_time) - if request == None: + if request is None: logger.warning(f"[mem_memorize] preprocess_conv_request returned None") return 0 @@ -1326,7 +1326,7 @@ async def memorize(request: MemorizeRequest) -> int: ) logger.debug(f"[mem_memorize] Extracting MemCell took: {time.perf_counter() - memcell_start} seconds") - if memcell_result == None: + if memcell_result is None: logger.warning(f"[mem_memorize] Skipped extracting MemCell") return 0 @@ -1345,7 +1345,7 @@ async def memorize(request: MemorizeRequest) -> int: ) logger.info("=" * 80) - if memcell == None: + if memcell is None: # No boundary detected, confirm current messages to accumulation (sync_status: -1 -> 0) await conversation_data_repo.save_conversation_data( request.new_raw_data_list, request.group_id diff --git a/src/infra_layer/adapters/out/persistence/repository/core_memory_raw_repository.py b/src/infra_layer/adapters/out/persistence/repository/core_memory_raw_repository.py index 8c532d48..3998ea0d 100644 --- a/src/infra_layer/adapters/out/persistence/repository/core_memory_raw_repository.py +++ b/src/infra_layer/adapters/out/persistence/repository/core_memory_raw_repository.py @@ -56,7 +56,7 @@ async def ensure_latest( ).update_many({"$set": {"is_latest": False}}) # Update the latest version's is_latest to True - if latest_version.is_latest != True: + if latest_version.is_latest is not True: latest_version.is_latest = True await latest_version.save(session=session) logger.debug( diff --git a/src/infra_layer/adapters/out/persistence/repository/group_profile_raw_repository.py b/src/infra_layer/adapters/out/persistence/repository/group_profile_raw_repository.py index 0935bb94..4d16a01c 100644 --- a/src/infra_layer/adapters/out/persistence/repository/group_profile_raw_repository.py +++ b/src/infra_layer/adapters/out/persistence/repository/group_profile_raw_repository.py @@ -59,7 +59,7 @@ async def ensure_latest( ).update_many({"$set": {"is_latest": False}}) # Update the latest version's is_latest to True - if latest_version.is_latest != True: + if latest_version.is_latest is not True: latest_version.is_latest = True await latest_version.save(session=session) logger.debug( diff --git a/src/infra_layer/adapters/out/persistence/repository/group_user_profile_memory_raw_repository.py b/src/infra_layer/adapters/out/persistence/repository/group_user_profile_memory_raw_repository.py index f0bfbbdf..094a4165 100644 --- a/src/infra_layer/adapters/out/persistence/repository/group_user_profile_memory_raw_repository.py +++ b/src/infra_layer/adapters/out/persistence/repository/group_user_profile_memory_raw_repository.py @@ -83,7 +83,7 @@ async def ensure_latest( ).update_many({"$set": {"is_latest": False}}) # Update the latest version's is_latest to True - if latest_version.is_latest != True: + if latest_version.is_latest is not True: latest_version.is_latest = True await latest_version.save(session=session) logger.debug( diff --git a/src/infra_layer/adapters/out/search/repository/episodic_memory_milvus_repository.py b/src/infra_layer/adapters/out/search/repository/episodic_memory_milvus_repository.py index 4aad056b..7b1e7131 100644 --- a/src/infra_layer/adapters/out/search/repository/episodic_memory_milvus_repository.py +++ b/src/infra_layer/adapters/out/search/repository/episodic_memory_milvus_repository.py @@ -124,7 +124,7 @@ async def create_and_save_episodic_memory( metadata_json = metadata try: metadata_dict = json.loads(metadata) - except: + except Exception: metadata_dict = {} # Prepare entity data diff --git a/tests/test_conv_memcell_extractor.py b/tests/test_conv_memcell_extractor.py index db24dbf8..f466a918 100644 --- a/tests/test_conv_memcell_extractor.py +++ b/tests/test_conv_memcell_extractor.py @@ -37,7 +37,7 @@ def get_llm_provider() -> LLMProvider: try: # Try to get from DI container return get_bean_by_type(LLMProvider) - except: + except Exception: # If not found in DI container, create directly logger.info("LLMProvider not found in DI container, creating directly...") return LLMProvider("openai") diff --git a/tests/test_group_user_profile_memory_raw_repository.py b/tests/test_group_user_profile_memory_raw_repository.py index 1ec4dc6e..40687265 100644 --- a/tests/test_group_user_profile_memory_raw_repository.py +++ b/tests/test_group_user_profile_memory_raw_repository.py @@ -528,7 +528,7 @@ async def test_batch_get_by_user_groups(): for user_id, group_id, _ in test_data: try: await repo.delete_by_user_group(user_id, group_id) - except: + except Exception: pass raise From ccc28a739e0763590e4d9ff4efe4b535ea9232a1 Mon Sep 17 00:00:00 2001 From: OpenClaw Assistant Date: Wed, 11 Mar 2026 08:12:32 +0800 Subject: [PATCH 2/3] fix: BM25/Embedding index filename mismatch with --from-conv/--to-conv slicing When using sliced runs (e.g. --from-conv 234 --to-conv 264), the index files were being saved with sequential indices (0, 1, 2...) but search was looking up with global conversation IDs (234, 235, 263), causing 'BM25 index not found' errors. Changes: - stage2_index_building.py: Use conversation_ids to name index files with extracted numeric IDs (e.g., 'bm25_index_conv_234.pkl') - evermemos_adapter.py: - Pass conversation_ids to stage2 for proper file naming - Fix conv_id_to_index mapping to map conversation_id -> extracted numeric ID (not sequential index) - Update _check_missing_indexes to use proper file naming - Save conversation_index_mapping.json for debugging This ensures index files and search lookups use consistent IDs. --- evaluation/src/adapters/evermemos/config.py | 4 ++ .../evermemos/stage2_index_building.py | 30 ++++++-- evaluation/src/adapters/evermemos_adapter.py | 68 ++++++++++++++++--- 3 files changed, 90 insertions(+), 12 deletions(-) diff --git a/evaluation/src/adapters/evermemos/config.py b/evaluation/src/adapters/evermemos/config.py index cd381bb4..21b6a187 100644 --- a/evaluation/src/adapters/evermemos/config.py +++ b/evaluation/src/adapters/evermemos/config.py @@ -96,3 +96,7 @@ class ExperimentConfig: max_retries: int = 5 max_concurrent_requests: int = 10 + + # Conversation IDs for index building (needed for --from-conv/--to-conv slicing) + # This maps sequential indices (0, 1, 2...) to actual conversation IDs + conversation_ids: list = [] diff --git a/evaluation/src/adapters/evermemos/stage2_index_building.py b/evaluation/src/adapters/evermemos/stage2_index_building.py index f49afcb3..41587c8d 100644 --- a/evaluation/src/adapters/evermemos/stage2_index_building.py +++ b/evaluation/src/adapters/evermemos/stage2_index_building.py @@ -126,8 +126,19 @@ def build_bm25_index( print(f"Reading data from: {data_dir}") + # Get conversation IDs for proper file naming + # If conversation_ids is provided, use them; otherwise fall back to sequential indices + conversation_ids = getattr(config, 'conversation_ids', []) + for i in range(config.num_conv): - file_path = data_dir / f"memcell_list_conv_{i}.json" + # Use conversation_id if available, otherwise use sequential index + if conversation_ids and i < len(conversation_ids): + # Extract numeric ID from conversation_id (e.g., "locomo_234" -> "234") + conv_id = conversation_ids[i].split("_")[-1] if "_" in conversation_ids[i] else conversation_ids[i] + else: + conv_id = str(i) + + file_path = data_dir / f"memcell_list_conv_{conv_id}.json" if not file_path.exists(): print(f"Warning: File not found, skipping: {file_path}") continue @@ -161,7 +172,7 @@ def build_bm25_index( # --- Saving the Index --- index_data = {"bm25": bm25, "docs": original_docs} - output_path = bm25_save_dir / f"bm25_index_conv_{i}.pkl" + output_path = bm25_save_dir / f"bm25_index_conv_{conv_id}.pkl" print(f"Saving index to: {output_path}") with open(output_path, "wb") as f: pickle.dump(index_data, f) @@ -190,8 +201,19 @@ async def build_emb_index(config: ExperimentConfig, data_dir: Path, emb_save_dir import time # For performance statistics + # Get conversation IDs for proper file naming + # If conversation_ids is provided, use them; otherwise fall back to sequential indices + conversation_ids = getattr(config, 'conversation_ids', []) + for i in range(config.num_conv): - file_path = data_dir / f"memcell_list_conv_{i}.json" + # Use conversation_id if available, otherwise use sequential index + if conversation_ids and i < len(conversation_ids): + # Extract numeric ID from conversation_id (e.g., "locomo_234" -> "234") + conv_id = conversation_ids[i].split("_")[-1] if "_" in conversation_ids[i] else conversation_ids[i] + else: + conv_id = str(i) + + file_path = data_dir / f"memcell_list_conv_{conv_id}.json" if not file_path.exists(): print(f"Warning: File not found, skipping: {file_path}") continue @@ -365,7 +387,7 @@ async def process_batch_with_retry( # }, # ... # ] - output_path = emb_save_dir / f"embedding_index_conv_{i}.pkl" + output_path = emb_save_dir / f"embedding_index_conv_{conv_id}.pkl" emb_save_dir.mkdir(parents=True, exist_ok=True) print(f"Saving embeddings to: {output_path}") with open(output_path, "wb") as f: diff --git a/evaluation/src/adapters/evermemos_adapter.py b/evaluation/src/adapters/evermemos_adapter.py index c2959401..432dbe72 100644 --- a/evaluation/src/adapters/evermemos_adapter.py +++ b/evaluation/src/adapters/evermemos_adapter.py @@ -102,7 +102,8 @@ def _extract_conv_index(conversation_id: str) -> str: return conversation_id def _check_missing_indexes( - self, index_dir: Path, num_conv: int, index_type: str = "bm25" + self, index_dir: Path, num_conv: int, index_type: str = "bm25", + conversation_ids: List[str] = None ) -> List[int]: """ Check for missing index files. @@ -111,6 +112,7 @@ def _check_missing_indexes( index_dir: Index directory num_conv: Total number of conversations index_type: Index type ("bm25" or "embedding") + conversation_ids: List of conversation IDs for proper file naming Returns: List of conversation indices with missing indexes @@ -118,10 +120,16 @@ def _check_missing_indexes( missing_indexes = [] for i in range(num_conv): + # Use extracted numeric ID for file naming if conversation_ids provided + if conversation_ids and i < len(conversation_ids): + conv_id = self._extract_conv_index(conversation_ids[i]) + else: + conv_id = str(i) + if index_type == "bm25": - index_file = index_dir / f"bm25_index_conv_{i}.pkl" + index_file = index_dir / f"bm25_index_conv_{conv_id}.pkl" else: # embedding - index_file = index_dir / f"embedding_index_conv_{i}.pkl" + index_file = index_dir / f"embedding_index_conv_{conv_id}.pkl" if not index_file.exists(): missing_indexes.append(i) @@ -363,10 +371,14 @@ async def run_with_completion(conv_id, task): # Call stage2 implementation to build indexes exp_config = self._convert_config_to_experiment_config() exp_config.num_conv = len(conversations) # Set conversation count + # Pass conversation IDs for proper index file naming (supports --from-conv/--to-conv slicing) + conversation_ids_list = [conv.conversation_id for conv in conversations] + exp_config.conversation_ids = conversation_ids_list # Smart skip logic: check existing index files bm25_need_build = self._check_missing_indexes( - index_dir=bm25_index_dir, num_conv=len(conversations), index_type="bm25" + index_dir=bm25_index_dir, num_conv=len(conversations), index_type="bm25", + conversation_ids=conversation_ids_list ) emb_need_build = [] @@ -376,6 +388,7 @@ async def run_with_completion(conv_id, task): index_dir=emb_index_dir, num_conv=len(conversations), index_type="embedding", + conversation_ids=conversation_ids_list ) # Statistics @@ -424,12 +437,29 @@ async def run_with_completion(conv_id, task): # ========== Plan A: Return index metadata (lazy loading) ========== # Don't load indexes into memory, only return paths and metadata + + # Build mapping from conversation_id to extracted numeric ID + # This is needed because when using --from-conv/--to-conv slicing: + # - Index files are saved with extracted numeric IDs (e.g., "234", "235"...) + # - But conversation_ids still contain original IDs (e.g., "locomo_exp_user_234") + # - We need to map conversation_id -> extracted numeric ID (not sequential index!) + conv_id_to_index = { + conv.conversation_id: self._extract_conv_index(conv.conversation_id) + for idx, conv in enumerate(conversations) + } + + # Save mapping to a JSON file for persistence across stages + mapping_file = output_dir / "conversation_index_mapping.json" + with open(mapping_file, "w") as f: + json.dump(conv_id_to_index, f, indent=2) + index_metadata = { "type": "lazy_load", # Mark as lazy loading "memcells_dir": str(memcells_dir), "bm25_index_dir": str(bm25_index_dir), "emb_index_dir": str(emb_index_dir), "conversation_ids": [conv.conversation_id for conv in conversations], + "conv_id_to_index": conv_id_to_index, # Add mapping for search stage "use_hybrid_search": use_hybrid, "total_conversations": len(conversations), } @@ -454,16 +484,29 @@ async def search( Search stage: Retrieve relevant MemCells. Lazy loading: Load indexes from files on demand (memory-friendly). + + Fix for --from-conv/--to-conv slicing: + - When building indexes, files are saved with sequential indices (0, 1, 2...) + - But conversation_id still contains original ID (e.g., "locomo_234") + - Use the mapping (conv_id_to_index) to find the correct sequential index """ # Lazy loading - read indexes from files bm25_index_dir = Path(index["bm25_index_dir"]) emb_index_dir = Path(index["emb_index_dir"]) - # Extract numeric index from conversation_id to find index files - # Example: conversation_id = "locomo_0" -> conv_index = "0" - conv_index = self._extract_conv_index(conversation_id) + # Get the sequential index from the mapping + # This mapping was created in add() stage and maps conversation_id -> sequential index + conv_id_to_index = index.get("conv_id_to_index", {}) + + if conversation_id in conv_id_to_index: + # Use the mapping to get sequential index + conv_index = conv_id_to_index[conversation_id] + else: + # Fallback: extract index from conversation_id (legacy behavior) + # This handles cases where the mapping is not available (e.g., old index files) + conv_index = self._extract_conv_index(conversation_id) - # Load BM25 index on demand (using numeric index) + # Load BM25 index on demand (using sequential index) bm25_file = bm25_index_dir / f"bm25_index_conv_{conv_index}.pkl" if not bm25_file.exists(): return SearchResult( @@ -682,12 +725,21 @@ def build_lazy_index( Returns: Index metadata dict """ + # Build mapping from conversation_id to extracted numeric ID + # This is needed for --from-conv/--to-conv slicing support + # Index files are named with extracted numeric IDs (e.g., "234", not sequential 0) + conv_id_to_index = { + conv.conversation_id: self._extract_conv_index(conv.conversation_id) + for idx, conv in enumerate(conversations) + } + return { "type": "lazy_load", "memcells_dir": str(output_dir / "memcells"), "bm25_index_dir": str(output_dir / "bm25_index"), "emb_index_dir": str(output_dir / "vectors"), "conversation_ids": [conv.conversation_id for conv in conversations], + "conv_id_to_index": conv_id_to_index, # Add mapping for search stage "use_hybrid_search": True, "total_conversations": len(conversations), } From d707743e11aefbc00f1166d77d2a0cf8a0fb8d1d Mon Sep 17 00:00:00 2001 From: Spark Date: Thu, 12 Mar 2026 20:13:05 +0800 Subject: [PATCH 3/3] fix(api): add full parameter to GET /api/v1/memories for full episode content This commit addresses issue #131 by adding a 'full' query parameter to the GET /api/v1/memories endpoint. When full=True, the response includes the complete episode field which is not returned by default for backward compatibility. Changes: - Add 'full' parameter to FetchMemRequest DTO - Add 'episode' field to EpisodicMemoryModel (optional, returned only when full=True) - Update find_memories method to accept 'full' parameter - Update _convert_episodic_memory to conditionally include episode content - Update memory_manager to pass 'full' parameter to fetch service This allows external benchmarks and third-party integrations to access the full episodic memory content for auditing and verification purposes. See: https://github.com/EverMind-AI/EverMemOS/issues/131 --- src/agentic_layer/fetch_mem_service.py | 46 ++++++++++++++++---------- src/agentic_layer/memory_manager.py | 1 + src/api_specs/dtos/memory.py | 6 ++++ src/api_specs/memory_models.py | 26 ++++++++++++++- 4 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/agentic_layer/fetch_mem_service.py b/src/agentic_layer/fetch_mem_service.py index 02c1e0c6..9502d45c 100644 --- a/src/agentic_layer/fetch_mem_service.py +++ b/src/agentic_layer/fetch_mem_service.py @@ -82,6 +82,7 @@ async def find_memories( end_time: Optional[str] = None, version_range: Optional[Tuple[Optional[str], Optional[str]]] = None, limit: int = 10, + full: bool = False, ) -> FetchMemResponse: """ Find memories by user ID and optional filters @@ -94,6 +95,7 @@ async def find_memories( end_time: End time for time range filtering (optional) version_range: Version range (start, end), closed interval [start, end] limit: Limit on number of returned items + full: When True, returns full episode content for episodic memory Returns: Memory query response @@ -345,13 +347,14 @@ def _convert_core_memory( ) def _convert_episodic_memory( - self, episodic_memory, user_details_cache: dict = None + self, episodic_memory, user_details_cache: dict = None, full: bool = False ) -> EpisodicMemoryModel: """Convert episodic memory document to model Args: episodic_memory: Episodic memory document user_details_cache: User details cache for batch metadata creation + full: When True, includes the full episode content """ # Create metadata with user details from cache user_info = ( @@ -369,25 +372,32 @@ def _convert_episodic_memory( phone=user_info.get('phone'), ) - return EpisodicMemoryModel( - id=str(episodic_memory.id), - user_id=episodic_memory.user_id, - episode_id=str(episodic_memory.event_id), - title=episodic_memory.subject, - summary=episodic_memory.summary, - participants=episodic_memory.participants or [], - location=( + # Build the model - include episode only when full=True + model_kwargs = { + "id": str(episodic_memory.id), + "user_id": episodic_memory.user_id, + "episode_id": str(episodic_memory.event_id), + "title": episodic_memory.subject, + "summary": episodic_memory.summary, + "participants": episodic_memory.participants or [], + "location": ( episodic_memory.extend.get("location", "") if episodic_memory.extend else "" ), - key_events=episodic_memory.keywords or [], - group_id=episodic_memory.group_id, - group_name=episodic_memory.group_name, - created_at=episodic_memory.created_at, - updated_at=episodic_memory.updated_at, - metadata=metadata, - ) + "key_events": episodic_memory.keywords or [], + "group_id": episodic_memory.group_id, + "group_name": episodic_memory.group_name, + "created_at": episodic_memory.created_at, + "updated_at": episodic_memory.updated_at, + "metadata": metadata, + } + + # Add episode field when full=True (backward compatible) + if full: + model_kwargs["episode"] = getattr(episodic_memory, "episode", None) + + return EpisodicMemoryModel(**model_kwargs) def _convert_behavior_history(self, behavior) -> BehaviorHistoryModel: """Convert behavior history document to model""" @@ -522,6 +532,7 @@ async def find_memories( end_time: Optional[str] = None, version_range: Optional[Tuple[Optional[str], Optional[str]]] = None, limit: int = 10, + full: bool = False, ) -> FetchMemResponse: """ Find memories by user ID and optional filters @@ -535,6 +546,7 @@ async def find_memories( version_range: Version range (start, end), closed interval [start, end]. If not provided or None, get the latest version (ordered by version descending) limit: Limit on number of returned items + full: When True, returns full episode content for episodic memory Returns: Memory query response @@ -609,7 +621,7 @@ async def find_memories( memories = [ self._convert_episodic_memory( - mem, user_details_cache=user_details_cache + mem, user_details_cache=user_details_cache, full=full ) for mem in episodic_memories ] diff --git a/src/agentic_layer/memory_manager.py b/src/agentic_layer/memory_manager.py index df42b05a..7feede4a 100644 --- a/src/agentic_layer/memory_manager.py +++ b/src/agentic_layer/memory_manager.py @@ -170,6 +170,7 @@ async def fetch_mem(self, request: FetchMemRequest) -> FetchMemResponse: end_time=request.end_time, version_range=request.version_range, limit=request.limit, + full=request.full, ) # Note: response.metadata already contains complete employee information diff --git a/src/api_specs/dtos/memory.py b/src/api_specs/dtos/memory.py index 4b5a6f57..a3e85601 100644 --- a/src/api_specs/dtos/memory.py +++ b/src/api_specs/dtos/memory.py @@ -525,6 +525,12 @@ class FetchMemRequest(BaseModel): description="End time for time range filtering (ISO 8601 format)", examples=["2024-12-31T23:59:59"], ) + full: Optional[bool] = Field( + default=False, + description="When set to True, returns the full episode content for episodic memory. " + "This includes the complete episode field which is not returned by default for backward compatibility.", + examples=[False], + ) model_config = {"arbitrary_types_allowed": True} diff --git a/src/api_specs/memory_models.py b/src/api_specs/memory_models.py index 6439a153..dce1d054 100644 --- a/src/api_specs/memory_models.py +++ b/src/api_specs/memory_models.py @@ -221,13 +221,37 @@ class PreferenceModel: @dataclass class EpisodicMemoryModel: - """Episodic memory model""" + """Episodic memory model + + Attributes: + id: Unique identifier for the memory + user_id: User ID who owns this memory + episode_id: Same as id, kept for compatibility + title: Memory title/subject + summary: Brief summary of the episode (may fallback to content[:200] if no LLM summary available) + episode: Full episodic content (returned only when full=True in request) + timestamp: When the episode occurred + participants: List of participants involved + location: Where the episode took place + start_time: Start time of the episode + end_time: End time of the episode + key_events: List of key events extracted + group_id: Group ID (for group memories) + group_name: Group name + created_at: When the memory was created + updated_at: When the memory was last updated + metadata: Additional metadata + extend: Extended fields + memcell_event_id_list: List of source memcell event IDs + subject: Memory subject (same as title) + """ id: str user_id: str episode_id: str # Same as id, no difference, kept for compatibility title: str summary: str + episode: Optional[str] = None # Full episode content (returned only when full=True) timestamp: Optional[datetime] = None participants: List[str] = field(default_factory=list) location: Optional[str] = None