EverMind-AI · Jah-yee · Mar 9, 2026 · Mar 11, 2026 · Mar 12, 2026
diff --git a/demo/utils/simple_memory_manager.py b/demo/utils/simple_memory_manager.py
@@ -244,7 +244,6 @@ async def search(
             query: Query text
             top_k: Number of results to return (default: 3)
             mode: Retrieval mode (default: "rrf")
-                - "rrf": RRF fusion (recommended)
                 - "keyword": Keyword retrieval (BM25)
                 - "vector": Vector retrieval
                 - "hybrid": Keyword + Vector + Rerank

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -14,7 +14,6 @@ services:
       - "27017:27017"
     volumes:
       - mongodb_data:/data/db
-      - ./docker/mongodb/init:/docker-entrypoint-initdb.d
     networks:
       - memsys-network
     healthcheck:

diff --git a/docs/STARTER_KIT.md b/docs/STARTER_KIT.md
@@ -31,6 +31,9 @@ Welcome to the EverMemOS Competition! This starter kit will help you get up and
 git clone https://github.com/EverMind-AI/EverMemOS.git
 cd EverMemOS
 
+# Copy environment template and configure (replace with your API keys)
+cp env.template .env
+
 # Start all services with Docker
 docker compose up -d
 

diff --git a/docs/dev_docs/getting_started.md b/docs/dev_docs/getting_started.md
@@ -273,7 +273,7 @@ uv run python src/bootstrap.py evaluation/locomo_evaluation/stage1_memcells_extr
 
 # Run other evaluation stages
 uv run python src/bootstrap.py evaluation/locomo_evaluation/stage2_index_building.py
-uv run python src/bootstrap.py evaluation/locomo_evaluation/stage3_memory_retrivel.py
+uv run python src/bootstrap.py evaluation/locomo_evaluation/stage3_memory_retrieval.py
 ```
 
 #### 2. Run Demo Scripts

diff --git a/evaluation/src/adapters/evermemos/README.md b/evaluation/src/adapters/evermemos/README.md
@@ -21,7 +21,7 @@ locomo_evaluation/
 │   └── answer_prompts.py              # Answer generation
 ├── stage1_memcells_extraction.py      # Stage 1: Extract MemCells
 ├── stage2_index_building.py           # Stage 2: Build indexes
-├── stage3_memory_retrivel.py          # Stage 3: Retrieve memories
+├── stage3_memory_retrieval.py          # Stage 3: Retrieve memories
 ├── stage4_response.py                 # Stage 4: Generate responses
 ├── stage5_eval.py                     # Stage 5: Evaluate results
 └── tools/                             # Utility tools
@@ -73,7 +73,7 @@ python evaluation/locomo_evaluation/stage1_memcells_extraction.py
 python evaluation/locomo_evaluation/stage2_index_building.py
 
 # Stage 3: Retrieve memories
-python evaluation/locomo_evaluation/stage3_memory_retrivel.py
+python evaluation/locomo_evaluation/stage3_memory_retrieval.py
 
 # Stage 4: Generate responses
 python evaluation/locomo_evaluation/stage4_response.py

diff --git a/evaluation/src/adapters/evermemos/config.py b/evaluation/src/adapters/evermemos/config.py
@@ -96,3 +96,7 @@ class ExperimentConfig:
 
     max_retries: int = 5
     max_concurrent_requests: int = 10
+
+    # Conversation IDs for index building (needed for --from-conv/--to-conv slicing)
+    # This maps sequential indices (0, 1, 2...) to actual conversation IDs
+    conversation_ids: list = []
diff --git a/evaluation/src/adapters/evermemos/stage2_index_building.py b/evaluation/src/adapters/evermemos/stage2_index_building.py
@@ -126,8 +126,19 @@ def build_bm25_index(
 
     print(f"Reading data from: {data_dir}")
 
+    # Get conversation IDs for proper file naming
+    # If conversation_ids is provided, use them; otherwise fall back to sequential indices
+    conversation_ids = getattr(config, 'conversation_ids', [])
+
     for i in range(config.num_conv):
-        file_path = data_dir / f"memcell_list_conv_{i}.json"
+        # Use conversation_id if available, otherwise use sequential index
+        if conversation_ids and i < len(conversation_ids):
+            # Extract numeric ID from conversation_id (e.g., "locomo_234" -> "234")
+            conv_id = conversation_ids[i].split("_")[-1] if "_" in conversation_ids[i] else conversation_ids[i]
+        else:
+            conv_id = str(i)
+
+        file_path = data_dir / f"memcell_list_conv_{conv_id}.json"
         if not file_path.exists():
             print(f"Warning: File not found, skipping: {file_path}")
             continue
@@ -161,7 +172,7 @@ def build_bm25_index(
         # --- Saving the Index ---
         index_data = {"bm25": bm25, "docs": original_docs}
 
-        output_path = bm25_save_dir / f"bm25_index_conv_{i}.pkl"
+        output_path = bm25_save_dir / f"bm25_index_conv_{conv_id}.pkl"
         print(f"Saving index to: {output_path}")
         with open(output_path, "wb") as f:
             pickle.dump(index_data, f)
@@ -190,8 +201,19 @@ async def build_emb_index(config: ExperimentConfig, data_dir: Path, emb_save_dir
 
     import time  # For performance statistics
 
+    # Get conversation IDs for proper file naming
+    # If conversation_ids is provided, use them; otherwise fall back to sequential indices
+    conversation_ids = getattr(config, 'conversation_ids', [])
+
     for i in range(config.num_conv):
-        file_path = data_dir / f"memcell_list_conv_{i}.json"
+        # Use conversation_id if available, otherwise use sequential index
+        if conversation_ids and i < len(conversation_ids):
+            # Extract numeric ID from conversation_id (e.g., "locomo_234" -> "234")
+            conv_id = conversation_ids[i].split("_")[-1] if "_" in conversation_ids[i] else conversation_ids[i]
+        else:
+            conv_id = str(i)
+
+        file_path = data_dir / f"memcell_list_conv_{conv_id}.json"
         if not file_path.exists():
             print(f"Warning: File not found, skipping: {file_path}")
             continue
@@ -365,7 +387,7 @@ async def process_batch_with_retry(
         #     },
         #     ...
         # ]
-        output_path = emb_save_dir / f"embedding_index_conv_{i}.pkl"
+        output_path = emb_save_dir / f"embedding_index_conv_{conv_id}.pkl"
         emb_save_dir.mkdir(parents=True, exist_ok=True)
         print(f"Saving embeddings to: {output_path}")
         with open(output_path, "wb") as f:

diff --git a/...pters/evermemos/stage3_memory_retrivel.py → ...ters/evermemos/stage3_memory_retrieval.py b/...pters/evermemos/stage3_memory_retrivel.py → ...ters/evermemos/stage3_memory_retrieval.py
diff --git a/evaluation/src/adapters/evermemos_adapter.py b/evaluation/src/adapters/evermemos_adapter.py
@@ -30,7 +30,7 @@
 from evaluation.src.adapters.evermemos import (
     stage1_memcells_extraction,
     stage2_index_building,
-    stage3_memory_retrivel,
+    stage3_memory_retrieval,
     stage4_response,
 )
 
@@ -102,7 +102,8 @@ def _extract_conv_index(conversation_id: str) -> str:
         return conversation_id
 
     def _check_missing_indexes(
-        self, index_dir: Path, num_conv: int, index_type: str = "bm25"
+        self, index_dir: Path, num_conv: int, index_type: str = "bm25",
+        conversation_ids: List[str] = None
     ) -> List[int]:
         """
         Check for missing index files.
@@ -111,17 +112,24 @@ def _check_missing_indexes(
             index_dir: Index directory
             num_conv: Total number of conversations
             index_type: Index type ("bm25" or "embedding")
+            conversation_ids: List of conversation IDs for proper file naming
 
         Returns:
             List of conversation indices with missing indexes
         """
         missing_indexes = []
 
         for i in range(num_conv):
+            # Use extracted numeric ID for file naming if conversation_ids provided
+            if conversation_ids and i < len(conversation_ids):
+                conv_id = self._extract_conv_index(conversation_ids[i])
+            else:
+                conv_id = str(i)
+
             if index_type == "bm25":
-                index_file = index_dir / f"bm25_index_conv_{i}.pkl"
+                index_file = index_dir / f"bm25_index_conv_{conv_id}.pkl"
             else:  # embedding
-                index_file = index_dir / f"embedding_index_conv_{i}.pkl"
+                index_file = index_dir / f"embedding_index_conv_{conv_id}.pkl"
 
             if not index_file.exists():
                 missing_indexes.append(i)
@@ -363,10 +371,14 @@ async def run_with_completion(conv_id, task):
         # Call stage2 implementation to build indexes
         exp_config = self._convert_config_to_experiment_config()
         exp_config.num_conv = len(conversations)  # Set conversation count
+        # Pass conversation IDs for proper index file naming (supports --from-conv/--to-conv slicing)
+        conversation_ids_list = [conv.conversation_id for conv in conversations]
+        exp_config.conversation_ids = conversation_ids_list
 
         # Smart skip logic: check existing index files
         bm25_need_build = self._check_missing_indexes(
-            index_dir=bm25_index_dir, num_conv=len(conversations), index_type="bm25"
+            index_dir=bm25_index_dir, num_conv=len(conversations), index_type="bm25",
+            conversation_ids=conversation_ids_list
         )
 
         emb_need_build = []
@@ -376,6 +388,7 @@ async def run_with_completion(conv_id, task):
                 index_dir=emb_index_dir,
                 num_conv=len(conversations),
                 index_type="embedding",
+                conversation_ids=conversation_ids_list
             )
 
         # Statistics
@@ -424,12 +437,29 @@ async def run_with_completion(conv_id, task):
 
         # ========== Plan A: Return index metadata (lazy loading) ==========
         # Don't load indexes into memory, only return paths and metadata
+
+        # Build mapping from conversation_id to extracted numeric ID
+        # This is needed because when using --from-conv/--to-conv slicing:
+        # - Index files are saved with extracted numeric IDs (e.g., "234", "235"...)
+        # - But conversation_ids still contain original IDs (e.g., "locomo_exp_user_234")
+        # - We need to map conversation_id -> extracted numeric ID (not sequential index!)
+        conv_id_to_index = {
+            conv.conversation_id: self._extract_conv_index(conv.conversation_id) 
+            for idx, conv in enumerate(conversations)
+        }
+
+        # Save mapping to a JSON file for persistence across stages
+        mapping_file = output_dir / "conversation_index_mapping.json"
+        with open(mapping_file, "w") as f:
+            json.dump(conv_id_to_index, f, indent=2)
+
         index_metadata = {
             "type": "lazy_load",  # Mark as lazy loading
             "memcells_dir": str(memcells_dir),
             "bm25_index_dir": str(bm25_index_dir),
             "emb_index_dir": str(emb_index_dir),
             "conversation_ids": [conv.conversation_id for conv in conversations],
+            "conv_id_to_index": conv_id_to_index,  # Add mapping for search stage
             "use_hybrid_search": use_hybrid,
             "total_conversations": len(conversations),
         }
@@ -454,16 +484,29 @@ async def search(
         Search stage: Retrieve relevant MemCells.
 
         Lazy loading: Load indexes from files on demand (memory-friendly).
+
+        Fix for --from-conv/--to-conv slicing:
+        - When building indexes, files are saved with sequential indices (0, 1, 2...)
+        - But conversation_id still contains original ID (e.g., "locomo_234")
+        - Use the mapping (conv_id_to_index) to find the correct sequential index
         """
         # Lazy loading - read indexes from files
         bm25_index_dir = Path(index["bm25_index_dir"])
         emb_index_dir = Path(index["emb_index_dir"])
 
-        # Extract numeric index from conversation_id to find index files
-        # Example: conversation_id = "locomo_0" -> conv_index = "0"
-        conv_index = self._extract_conv_index(conversation_id)
+        # Get the sequential index from the mapping
+        # This mapping was created in add() stage and maps conversation_id -> sequential index
+        conv_id_to_index = index.get("conv_id_to_index", {})
+
+        if conversation_id in conv_id_to_index:
+            # Use the mapping to get sequential index
+            conv_index = conv_id_to_index[conversation_id]
+        else:
+            # Fallback: extract index from conversation_id (legacy behavior)
+            # This handles cases where the mapping is not available (e.g., old index files)
+            conv_index = self._extract_conv_index(conversation_id)
 
-        # Load BM25 index on demand (using numeric index)
+        # Load BM25 index on demand (using sequential index)
         bm25_file = bm25_index_dir / f"bm25_index_conv_{conv_index}.pkl"
         if not bm25_file.exists():
             return SearchResult(
@@ -497,7 +540,7 @@ async def search(
 
         if retrieval_mode == "agentic":
             # Agentic retrieval
-            top_results, metadata = await stage3_memory_retrivel.agentic_retrieval(
+            top_results, metadata = await stage3_memory_retrieval.agentic_retrieval(
                 query=query,
                 config=exp_config,
                 llm_provider=self.llm_provider,
@@ -508,7 +551,7 @@ async def search(
             )
         elif retrieval_mode == "lightweight":
             # Lightweight retrieval
-            top_results, metadata = await stage3_memory_retrivel.lightweight_retrieval(
+            top_results, metadata = await stage3_memory_retrieval.lightweight_retrieval(
                 query=query,
                 emb_index=emb_index,
                 bm25=bm25,
@@ -517,7 +560,7 @@ async def search(
             )
         else:
             # Default to hybrid retrieval
-            top_results = await stage3_memory_retrivel.hybrid_search_with_rrf(
+            top_results = await stage3_memory_retrieval.hybrid_search_with_rrf(
                 query=query,
                 emb_index=emb_index,
                 bm25=bm25,
@@ -682,12 +725,21 @@ def build_lazy_index(
         Returns:
             Index metadata dict
         """
+        # Build mapping from conversation_id to extracted numeric ID
+        # This is needed for --from-conv/--to-conv slicing support
+        # Index files are named with extracted numeric IDs (e.g., "234", not sequential 0)
+        conv_id_to_index = {
+            conv.conversation_id: self._extract_conv_index(conv.conversation_id)
+            for idx, conv in enumerate(conversations)
+        }
+
         return {
             "type": "lazy_load",
             "memcells_dir": str(output_dir / "memcells"),
             "bm25_index_dir": str(output_dir / "bm25_index"),
             "emb_index_dir": str(output_dir / "vectors"),
             "conversation_ids": [conv.conversation_id for conv in conversations],
+            "conv_id_to_index": conv_id_to_index,  # Add mapping for search stage
             "use_hybrid_search": True,
             "total_conversations": len(conversations),
         }