From 6b8392fd2470fd033ca11492e04de582ff30db76 Mon Sep 17 00:00:00 2001 From: Ronit Kiran Murmu Date: Mon, 2 Mar 2026 14:58:59 +0530 Subject: [PATCH 1/4] added CRUD for Chorma Vector store --- backend/main/src/store/DBVector.py | 643 +++++++++++++++++++++++++++++ backend/pyproject.toml | 6 + backend/tests/test_db_vector.py | 165 ++++++++ 3 files changed, 814 insertions(+) create mode 100644 backend/main/src/store/DBVector.py create mode 100644 backend/tests/test_db_vector.py diff --git a/backend/main/src/store/DBVector.py b/backend/main/src/store/DBVector.py new file mode 100644 index 0000000..068342f --- /dev/null +++ b/backend/main/src/store/DBVector.py @@ -0,0 +1,643 @@ +""" +ChromaDB Vector Store CRUD Manager for Deep Researcher. +Mirrors the design of `SQLiteManager` in `DBManager.py`. +Exports a singleton `db_vector_manager` for SDK-style import. +""" + +from pathlib import Path +from typing import Any, Dict, List, Optional, Union +import logging +import sys + +# ------------------------------------------------------------------ +# Path bootstrap +# ------------------------------------------------------------------ +BASE_DIR = Path(__file__).parent # .../store/ +src_dir = BASE_DIR.parent # .../src/ + +for _p in [str(src_dir), str(src_dir.parent)]: + if _p not in sys.path: + sys.path.append(_p) + +from utils.DRLogger import dr_logger # noqa: E402 + +logging.basicConfig(level=logging.INFO) +_std_logger = logging.getLogger(__name__) + +try: + import chromadb + _CHROMA_AVAILABLE = True +except ImportError: + _CHROMA_AVAILABLE = False + + +class DBVectorManager: + """ + ## Description + + Reusable CRUD manager for a ChromaDB persistent vector collection. + All methods accept structured Python arguments — no raw query strings. + All methods return `{"success": bool, "message": str, "data": any | None}`. + + ## Parameters + + - `persist_directory` (`Union[str, Path]`) + - Description: Path where ChromaDB stores data on disk. + - Constraints: Must be a writable directory path. + - Example: `"backend/main/src/store/database/chroma_store"` + + - `collection_name` (`str`) + - Description: Name of the ChromaDB collection to manage. + - Constraints: Non-empty string; alphanumeric + underscores. + - Example: `"research_documents"` + + ## Returns + + `None` + + ## Raises + + - `RuntimeError` — when `chromadb` is not installed. + + ## Side Effects + + - Opens or creates a persistent ChromaDB client at `persist_directory`. + - Gets or creates the collection automatically on instantiation. + + ## Debug Notes + + - Install chromadb via `uv add chromadb` if import fails. + + ## Customization + + - Pass a custom `embedding_function` to `get_or_create_collection` for non-default embeddings. + """ + + def __init__(self, persist_directory: Union[str, Path], collection_name: str) -> None: + if not _CHROMA_AVAILABLE: + raise RuntimeError("chromadb is not installed. Run: uv add chromadb") + + self.persist_directory = str(persist_directory) + self.collection_name = collection_name + self._client = chromadb.PersistentClient(path=self.persist_directory) + self._collection = self._client.get_or_create_collection(name=self.collection_name) + + def _log(self, level: str, message: str, urgency: str = "none") -> None: + """ + ## Description + + Emits a log entry to the standard Python logger and `DRLogger`. + + ## Parameters + + - `level` (`str`) — `"info"` or `"error"`. + - `message` (`str`) — Event description. + - `urgency` (`str`) — `"none"`, `"moderate"`, or `"critical"`. Defaults to `"none"`. + + ## Returns + + `None` + + ## Raises + + - `None` — DRLogger failures are silently caught. + + ## Side Effects + + - Writes to terminal and to `logs.db.sqlite3` via `DRLogger`. + + ## Debug Notes + + - DRLogger failures are printed to terminal but do not raise. + + ## Customization + + - Extend with `"warning"` level support if needed. + """ + if level == "error": + _std_logger.error(message) + else: + _std_logger.info(message) + + try: + dr_logger.log( + log_type="error" if level == "error" else "info", + message=message, + origin="system", + module="DB", + urgency=urgency, # type: ignore + app_version="1.0", + ) + except Exception as e: + _std_logger.error(f"DRLogger internal failure in DBVectorManager: {e}") + + def add( + self, + ids: List[str], + documents: Optional[List[str]] = None, + metadatas: Optional[List[Dict[str, Any]]] = None, + embeddings: Optional[List[List[float]]] = None, + ) -> Dict[str, Any]: + """ + ## Description + + Adds one or more documents to the collection. Equivalent to INSERT. + + ## Parameters + + - `ids` (`List[str]`) + - Description: Unique string identifiers, one per document. + - Constraints: Non-empty. Each ID must be unique within the collection. + - Example: `["doc-001", "doc-002"]` + + - `documents` (`Optional[List[str]]`) + - Description: Raw text content; ChromaDB auto-embeds. + - Constraints: Length must match `ids`. + - Example: `["Deep learning is a subset of ML."]` + + - `metadatas` (`Optional[List[Dict[str, Any]]]`) + - Description: Key-value metadata for filtering. + - Constraints: Values must be `str`, `int`, `float`, or `bool`. + - Example: `[{"source": "arxiv", "year": 2024}]` + + - `embeddings` (`Optional[List[List[float]]]`) + - Description: Pre-computed vectors; skips ChromaDB's default embedding. + - Constraints: Length must match `ids`. + - Example: `[[0.1, 0.2, ...]]` + + ## Returns + + `dict` + + ```json + { "success": true, "message": "2 document(s) added to collection '...'", "data": { "count": 2 } } + { "success": false, "message": "ID already exists.", "data": null } + ``` + + ## Raises + + - `None` — all exceptions caught internally. + + ## Side Effects + + - Persists documents to disk. Logs via `DRLogger`. + + ## Debug Notes + + - Duplicate IDs raise `IDAlreadyExistsError`. Use `update()` to modify existing docs. + + ## Customization + + - For upsert semantics, extend to call `_collection.upsert()`. + """ + if not ids: + return {"success": False, "message": "ids must be a non-empty list.", "data": None} + + try: + # Check for existing IDs — chromadb 1.5.2+ silently ignores duplicates + existing = self._collection.get(ids=ids) + if existing.get("ids"): + found = existing["ids"][0] + return {"success": False, "message": f"ID '{found}' already exists.", "data": None} + + kwargs: Dict[str, Any] = {"ids": ids} + if documents is not None: + kwargs["documents"] = documents + if metadatas is not None: + kwargs["metadatas"] = metadatas + if embeddings is not None: + kwargs["embeddings"] = embeddings + + self._collection.add(**kwargs) + count = len(ids) + self._log("info", f"Added {count} document(s) to '{self.collection_name}'.") + return { + "success": True, + "message": f"{count} document(s) added to collection '{self.collection_name}'", + "data": {"count": count}, + } + except Exception as e: + self._log("error", f"Error adding to '{self.collection_name}': {e}", "moderate") + return {"success": False, "message": str(e), "data": None} + + def fetch_all( + self, + where: Optional[Dict[str, Any]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + ) -> Dict[str, Any]: + """ + ## Description + + Retrieves documents from the collection with optional metadata filtering. + Equivalent to SELECT * (with optional WHERE). + + ## Parameters + + - `where` (`Optional[Dict[str, Any]]`) + - Description: Metadata equality filter. `None` fetches all. + - Constraints: Values must be `str`, `int`, `float`, or `bool`. + - Example: `{"source": "arxiv"}` + + - `limit` (`Optional[int]`) + - Description: Max documents to return. + - Constraints: Must be >= 1 if provided. + - Example: `50` + + - `offset` (`Optional[int]`) + - Description: Documents to skip (for pagination). + - Constraints: Must be >= 0 if provided. + - Example: `10` + + ## Returns + + `dict` + + ```json + { + "success": true, + "message": "Fetched 3 document(s) from collection '...'", + "data": { "ids": [...], "documents": [...], "metadatas": [...], "embeddings": null } + } + ``` + + ## Raises + + - `None` — all exceptions caught internally. + + ## Side Effects + + - None (read-only). + + ## Debug Notes + + - `embeddings` are excluded from the response by default (heavy payload). + + ## Customization + + - Add `"embeddings"` to the `include` list inside `.get()` if raw vectors are needed. + """ + try: + kwargs: Dict[str, Any] = {"include": ["documents", "metadatas"]} + if where is not None: + kwargs["where"] = where + if limit is not None: + kwargs["limit"] = limit + if offset is not None: + kwargs["offset"] = offset + + result = self._collection.get(**kwargs) + ids = result.get("ids", []) + self._log("info", f"Fetched {len(ids)} document(s) from '{self.collection_name}'.") + return { + "success": True, + "message": f"Fetched {len(ids)} document(s) from collection '{self.collection_name}'", + "data": { + "ids": ids, + "documents": result.get("documents"), + "metadatas": result.get("metadatas"), + "embeddings": None, + }, + } + except Exception as e: + self._log("error", f"Error fetching from '{self.collection_name}': {e}", "moderate") + return {"success": False, "message": str(e), "data": None} + + def fetch_one(self, id: str) -> Dict[str, Any]: + """ + ## Description + + Retrieves a single document by its unique string ID. + Equivalent to SELECT * WHERE id = '...' LIMIT 1. + + ## Parameters + + - `id` (`str`) + - Description: Unique identifier of the document to retrieve. + - Constraints: Non-empty string previously used in `add()`. + - Example: `"doc-001"` + + ## Returns + + `dict` + + ```json + { "success": true, "message": "Document 'doc-001' fetched successfully", "data": { "id": "doc-001", "document": "...", "metadata": {} } } + { "success": true, "message": "Document 'doc-001' not found in collection", "data": null } + { "success": false, "message": "ChromaDB error", "data": null } + ``` + + ## Raises + + - `None` — all exceptions caught internally. + + ## Side Effects + + - None (read-only). + + ## Debug Notes + + - Missing ID → `success: true, data: null` (not an error — document simply doesn't exist). + + ## Customization + + - Add `"embeddings"` to the `include` list to return raw vectors. + """ + if not id: + return {"success": False, "message": "id must be a non-empty string.", "data": None} + + try: + result = self._collection.get(ids=[id], include=["documents", "metadatas"]) + ids = result.get("ids", []) + + if not ids: + return { + "success": True, + "message": f"Document '{id}' not found in collection '{self.collection_name}'", + "data": None, + } + + docs = result.get("documents") or [None] + metas = result.get("metadatas") or [None] + self._log("info", f"Document '{id}' fetched from '{self.collection_name}'.") + return { + "success": True, + "message": f"Document '{id}' fetched successfully", + "data": {"id": ids[0], "document": docs[0], "metadata": metas[0]}, + } + except Exception as e: + self._log("error", f"Error fetching '{id}' from '{self.collection_name}': {e}", "moderate") + return {"success": False, "message": str(e), "data": None} + + def update( + self, + ids: List[str], + documents: Optional[List[str]] = None, + metadatas: Optional[List[Dict[str, Any]]] = None, + embeddings: Optional[List[List[float]]] = None, + ) -> Dict[str, Any]: + """ + ## Description + + Updates existing documents by their IDs. Equivalent to UPDATE WHERE id IN (...). + Only the fields provided are overwritten. + + ## Parameters + + - `ids` (`List[str]`) + - Description: IDs of documents to update. + - Constraints: Non-empty. IDs should already exist in the collection. + - Example: `["doc-001"]` + + - `documents` (`Optional[List[str]]`) + - Description: Replacement text; triggers re-embedding. + - Constraints: Length must match `ids`. + - Example: `["Updated text."]` + + - `metadatas` (`Optional[List[Dict[str, Any]]]`) + - Description: Replacement metadata (full overwrite, not a merge). + - Constraints: Values must be `str`, `int`, `float`, or `bool`. + - Example: `[{"version": 2}]` + + - `embeddings` (`Optional[List[List[float]]]`) + - Description: Replacement embedding vectors. + - Constraints: Length must match `ids`. + - Example: `[[0.5, 0.6, ...]]` + + ## Returns + + `dict` + + ```json + { "success": true, "message": "1 document(s) updated in collection '...'", "data": { "count": 1 } } + { "success": false, "message": "Update requires at least one of: documents, metadatas, embeddings.", "data": null } + ``` + + ## Raises + + - `None` — all exceptions caught internally. + + ## Side Effects + + - Overwrites document fields in persistent storage. Logs via `DRLogger`. + + ## Debug Notes + + - Non-existent IDs are silently skipped by ChromaDB — no error is raised. + + ## Customization + + - For upsert (add if missing), call `_collection.upsert()` directly. + """ + if not ids: + return {"success": False, "message": "ids must be a non-empty list.", "data": None} + + if documents is None and metadatas is None and embeddings is None: + return { + "success": False, + "message": "Update requires at least one of: documents, metadatas, embeddings.", + "data": None, + } + + try: + kwargs: Dict[str, Any] = {"ids": ids} + if documents is not None: + kwargs["documents"] = documents + if metadatas is not None: + kwargs["metadatas"] = metadatas + if embeddings is not None: + kwargs["embeddings"] = embeddings + + self._collection.update(**kwargs) + count = len(ids) + self._log("info", f"Updated {count} document(s) in '{self.collection_name}'.") + return { + "success": True, + "message": f"{count} document(s) updated in collection '{self.collection_name}'", + "data": {"count": count}, + } + except Exception as e: + self._log("error", f"Error updating '{self.collection_name}': {e}", "moderate") + return {"success": False, "message": str(e), "data": None} + + def delete(self, ids: List[str]) -> Dict[str, Any]: + """ + ## Description + + Permanently removes documents by their IDs. + Equivalent to DELETE WHERE id IN (...). + `ids` is required — there is no "delete all" semantic by design. + + ## Parameters + + - `ids` (`List[str]`) + - Description: IDs of documents to delete. + - Constraints: Non-empty list. Non-existent IDs are silently skipped. + - Example: `["doc-001", "doc-002"]` + + ## Returns + + `dict` + + ```json + { "success": true, "message": "2 document(s) deleted from collection '...'", "data": { "count": 2 } } + { "success": false, "message": "delete() requires a non-empty ids list.", "data": null } + ``` + + ## Raises + + - `None` — all exceptions caught internally. + + ## Side Effects + + - Irreversibly removes vectors and metadata from storage. Logs via `DRLogger`. + + ## Debug Notes + + - Deletion is permanent. Use metadata soft-delete if recoverability is needed. + + ## Customization + + - For metadata-based bulk deletions, extend to call `_collection.delete(where={...})`. + """ + if not ids: + return { + "success": False, + "message": "delete() requires a non-empty ids list.", + "data": None, + } + + try: + self._collection.delete(ids=ids) + count = len(ids) + self._log("info", f"Deleted {count} document(s) from '{self.collection_name}'.") + return { + "success": True, + "message": f"{count} document(s) deleted from collection '{self.collection_name}'", + "data": {"count": count}, + } + except Exception as e: + self._log("error", f"Error deleting from '{self.collection_name}': {e}", "moderate") + return {"success": False, "message": str(e), "data": None} + + def collection_exists(self) -> Dict[str, Any]: + """ + ## Description + + Health-check: verifies the collection is accessible and returns its document count. + + ## Parameters + + - `None` + + ## Returns + + `dict` + + ```json + { "success": true, "message": "Collection '...' exists and is accessible", "data": { "collection_name": "...", "count": 42 } } + { "success": false, "message": "Collection '...' is not accessible: ...", "data": null } + ``` + + ## Raises + + - `None` — all exceptions caught internally. + + ## Side Effects + + - None (read-only). + + ## Debug Notes + + - `count: 0` means the collection is empty, not missing. + + ## Customization + + - Use `count` to drive pagination decisions in `fetch_all()`. + """ + try: + count = self._collection.count() + return { + "success": True, + "message": f"Collection '{self.collection_name}' exists and is accessible", + "data": {"collection_name": self.collection_name, "count": count}, + } + except Exception as e: + self._log("error", f"Collection '{self.collection_name}' not accessible: {e}", "critical") + return { + "success": False, + "message": f"Collection '{self.collection_name}' is not accessible: {e}", + "data": None, + } + + +def _initialize_chroma_store() -> None: + """ + ## Description + + Ensures `database/chroma_store/` exists on disk. Runs on module import. + + ## Parameters + + - `None` + + ## Returns + + `None` + + ## Raises + + - `None` — failures are logged and swallowed so import always succeeds. + + ## Side Effects + + - Creates `store/database/chroma_store/` if absent. Logs via `DRLogger`. + + ## Debug Notes + + - Verify the process has write access to `backend/main/src/store/database/`. + + ## Customization + + - Adjust the `chroma_store_dir` path if the project layout changes. + """ + chroma_store_dir = BASE_DIR / "database" / "chroma_store" + try: + chroma_store_dir.mkdir(parents=True, exist_ok=True) + _std_logger.info(f"ChromaDB storage directory ensured: {chroma_store_dir}") + dr_logger.log( + log_type="info", + message=f"ChromaDB storage directory ensured: {chroma_store_dir}", + origin="system", + module="DB", + urgency="none", + app_version="1.0", + ) + except Exception as e: + _std_logger.error(f"Failed to initialize ChromaDB store directory: {e}") + try: + dr_logger.log( + log_type="error", + message=f"Failed to initialize ChromaDB store directory: {e}", + origin="system", + module="DB", + urgency="critical", + app_version="1.0", + ) + except Exception: + pass + + +# Run on import +_initialize_chroma_store() + +# Singleton export — SDK-style, mirrors DBManager pattern +_chroma_store_path = BASE_DIR / "database" / "chroma_store" + +if _CHROMA_AVAILABLE: + db_vector_manager = DBVectorManager( + persist_directory=_chroma_store_path, + collection_name="research_documents", + ) +else: + db_vector_manager = None # type: ignore diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 597bbdc..05055d6 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -5,7 +5,13 @@ description = "This is an Backend for Deep Researcher, a research assistant that readme = "README.md" requires-python = ">=3.12" dependencies = [ + "chromadb>=0.6.0", "google-genai>=1.65.0", "ollama>=0.6.1", "python-dotenv>=1.2.1", ] + +[dependency-groups] +dev = [ + "pytest>=9.0.2", +] diff --git a/backend/tests/test_db_vector.py b/backend/tests/test_db_vector.py new file mode 100644 index 0000000..84e15ed --- /dev/null +++ b/backend/tests/test_db_vector.py @@ -0,0 +1,165 @@ +# --------------------------------------------------------------------------- +# Test the actual implementation by subclassing DBVectorManager and injecting +# an ephemeral (in-memory) client — avoids writing any files to disk. +# --------------------------------------------------------------------------- +import unittest +from unittest.mock import patch, MagicMock +import chromadb +from main.src.store.DBVector import DBVectorManager + + +class _InMemoryDBVectorManager(DBVectorManager): + """ + Subclass that forces use of an EphemeralClient for testing. + Inherits all CRUD logic from the real DBVectorManager. + """ + + def __init__(self, collection_name: str) -> None: + # Skip the parent __init__ which tries to create PersistentClient + self.collection_name = collection_name + self._client = chromadb.EphemeralClient() + self._collection = self._client.get_or_create_collection( + name=self.collection_name + ) + + # Note: CRUD methods (add, fetch_all, fetch_one, update, delete, collection_exists) + # are all inherited from DBVectorManager and will be tested here. + + +# =========================================================================== +# Test Cases +# =========================================================================== + +class TestDBVectorManager(unittest.TestCase): + """Unit tests for DBVectorManager CRUD operations using in-memory ChromaDB.""" + + def setUp(self) -> None: + # Mock dr_logger to avoid persistent side effects + self.patcher = patch("main.src.store.DBVector.dr_logger") + self.mock_logger = self.patcher.start() + + # Unique collection name per test to ensure isolation + import uuid + test_uuid = uuid.uuid4().hex[:8] + self.manager = _InMemoryDBVectorManager(f"test_coll_{test_uuid}") + + def tearDown(self) -> None: + self.patcher.stop() + + # ----------------------------------------------------------------------- + + def test_add_and_fetch_one(self) -> None: + """Add a single document then retrieve it by ID.""" + response = self.manager.add( + ids=["doc-001"], + documents=["Deep learning is a subset of machine learning."], + metadatas=[{"source": "arxiv", "year": 2024}], + ) + self.assertTrue(response["success"]) + self.assertEqual(response["data"]["count"], 1) + + fetch_response = self.manager.fetch_one("doc-001") + self.assertTrue(fetch_response["success"]) + doc = fetch_response["data"] + self.assertIsNotNone(doc) + self.assertEqual(doc["id"], "doc-001") + self.assertIn("machine learning", doc["document"]) + self.assertEqual(doc["metadata"]["source"], "arxiv") + + def test_fetch_all(self) -> None: + """Add multiple documents then fetch all.""" + self.manager.add(ids=["a"], documents=["Alpha doc"], metadatas=[{"tag": "a"}]) + self.manager.add(ids=["b"], documents=["Beta doc"], metadatas=[{"tag": "b"}]) + self.manager.add(ids=["c"], documents=["Gamma doc"], metadatas=[{"tag": "c"}]) + + response = self.manager.fetch_all() + self.assertTrue(response["success"]) + data = response["data"] + self.assertEqual(len(data["ids"]), 3) + self.assertIn("a", data["ids"]) + self.assertIn("b", data["ids"]) + self.assertIn("c", data["ids"]) + + def test_fetch_all_with_where_filter(self) -> None: + """fetch_all with a metadata where-filter returns only matching docs.""" + self.manager.add(ids=["x"], documents=["X doc"], metadatas=[{"env": "prod"}]) + self.manager.add(ids=["y"], documents=["Y doc"], metadatas=[{"env": "dev"}]) + + response = self.manager.fetch_all(where={"env": "prod"}) + self.assertTrue(response["success"]) + self.assertEqual(response["data"]["ids"], ["x"]) + + def test_update(self) -> None: + """Add a document, update its text and metadata, then verify.""" + self.manager.add( + ids=["upd-1"], + documents=["Original text."], + metadatas=[{"version": 1}], + ) + + update_response = self.manager.update( + ids=["upd-1"], + documents=["Updated text."], + metadatas=[{"version": 2}], + ) + self.assertTrue(update_response["success"]) + self.assertEqual(update_response["data"]["count"], 1) + + fetch_response = self.manager.fetch_one("upd-1") + self.assertTrue(fetch_response["success"]) + doc = fetch_response["data"] + self.assertEqual(doc["document"], "Updated text.") + self.assertEqual(doc["metadata"]["version"], 2) + + def test_delete(self) -> None: + """Add a document, delete it, then confirm it is gone.""" + self.manager.add(ids=["del-1"], documents=["To be deleted."]) + + delete_response = self.manager.delete(ids=["del-1"]) + self.assertTrue(delete_response["success"]) + self.assertEqual(delete_response["data"]["count"], 1) + + fetch_response = self.manager.fetch_one("del-1") + self.assertTrue(fetch_response["success"]) # success=True (document simply absent) + self.assertIsNone(fetch_response["data"]) # data=None confirms it is gone + + def test_add_duplicate_id_fails(self) -> None: + """Adding a document with an already-existing ID returns success=False.""" + self.manager.add(ids=["dup-1"], documents=["First insert."]) + + response = self.manager.add(ids=["dup-1"], documents=["Duplicate insert."]) + self.assertFalse(response["success"]) + self.assertIsNone(response["data"]) + self.assertIsNotNone(response["message"]) # Error message must be present + + def test_fetch_one_not_found(self) -> None: + """Fetching a non-existent ID returns success=True with data=None.""" + response = self.manager.fetch_one("ghost-id-99999") + self.assertTrue(response["success"]) + self.assertIsNone(response["data"]) + self.assertIn("not found", response["message"].lower()) + + def test_update_requires_payload(self) -> None: + """update() with no documents/metadatas/embeddings returns success=False.""" + self.manager.add(ids=["empty-upd"], documents=["Some text."]) + response = self.manager.update(ids=["empty-upd"]) # nothing to update + self.assertFalse(response["success"]) + self.assertIn("requires at least one of", response["message"]) + + def test_delete_empty_ids_fails(self) -> None: + """delete() called with an empty list returns success=False.""" + response = self.manager.delete(ids=[]) + self.assertFalse(response["success"]) + self.assertIn("non-empty ids list", response["message"]) + + def test_collection_exists(self) -> None: + """collection_exists() returns success=True with a count field.""" + self.manager.add(ids=["ce-1"], documents=["Existence check doc."]) + response = self.manager.collection_exists() + self.assertTrue(response["success"]) + self.assertIn("count", response["data"]) + self.assertGreaterEqual(response["data"]["count"], 1) + + +if __name__ == "__main__": + unittest.main() From bbe4cf64ab5820e5c5bc31c1e0bd0552517b50b7 Mon Sep 17 00:00:00 2001 From: Ronit Kiran Murmu Date: Mon, 2 Mar 2026 19:36:26 +0530 Subject: [PATCH 2/4] Addressing PR comments: Logging update, imports clean up, version management --- backend/main/src/store/DBVector.py | 80 ++++++++++++++------- backend/main/src/utils/versionManagement.py | 5 ++ backend/tests/test_db_vector.py | 5 ++ 3 files changed, 66 insertions(+), 24 deletions(-) diff --git a/backend/main/src/store/DBVector.py b/backend/main/src/store/DBVector.py index 068342f..7e7be04 100644 --- a/backend/main/src/store/DBVector.py +++ b/backend/main/src/store/DBVector.py @@ -19,10 +19,8 @@ if _p not in sys.path: sys.path.append(_p) -from utils.DRLogger import dr_logger # noqa: E402 - -logging.basicConfig(level=logging.INFO) -_std_logger = logging.getLogger(__name__) +from main.src.utils.DRLogger import dr_logger +from main.src.utils.versionManagement import getAppVersion try: import chromadb @@ -30,6 +28,9 @@ except ImportError: _CHROMA_AVAILABLE = False +logging.basicConfig(level=logging.INFO) +_std_logger = logging.getLogger(__name__) + class DBVectorManager: """ @@ -75,7 +76,20 @@ class DBVectorManager: def __init__(self, persist_directory: Union[str, Path], collection_name: str) -> None: if not _CHROMA_AVAILABLE: - raise RuntimeError("chromadb is not installed. Run: uv add chromadb") + error_msg = "chromadb is not installed. Run: uv add chromadb" + _std_logger.error(error_msg) + try: + dr_logger.log( + log_type="error", + message=error_msg, + origin="system", + module="DB", + urgency="critical", + app_version=getAppVersion(), + ) + except Exception: + pass + raise RuntimeError(error_msg) self.persist_directory = str(persist_directory) self.collection_name = collection_name @@ -90,7 +104,7 @@ def _log(self, level: str, message: str, urgency: str = "none") -> None: ## Parameters - - `level` (`str`) — `"info"` or `"error"`. + - `level` (`str`) — `"info"`, `"success"`, or `"error"`. - `message` (`str`) — Event description. - `urgency` (`str`) — `"none"`, `"moderate"`, or `"critical"`. Defaults to `"none"`. @@ -121,12 +135,12 @@ def _log(self, level: str, message: str, urgency: str = "none") -> None: try: dr_logger.log( - log_type="error" if level == "error" else "info", + log_type=level, message=message, origin="system", module="DB", urgency=urgency, # type: ignore - app_version="1.0", + app_version=getAppVersion(), ) except Exception as e: _std_logger.error(f"DRLogger internal failure in DBVectorManager: {e}") @@ -191,14 +205,18 @@ def add( - For upsert semantics, extend to call `_collection.upsert()`. """ if not ids: - return {"success": False, "message": "ids must be a non-empty list.", "data": None} + msg = "ids must be a non-empty list." + self._log("error", msg, "moderate") + return {"success": False, "message": msg, "data": None} try: # Check for existing IDs — chromadb 1.5.2+ silently ignores duplicates existing = self._collection.get(ids=ids) if existing.get("ids"): found = existing["ids"][0] - return {"success": False, "message": f"ID '{found}' already exists.", "data": None} + msg = f"ID '{found}' already exists." + self._log("error", msg, "moderate") + return {"success": False, "message": msg, "data": None} kwargs: Dict[str, Any] = {"ids": ids} if documents is not None: @@ -210,7 +228,7 @@ def add( self._collection.add(**kwargs) count = len(ids) - self._log("info", f"Added {count} document(s) to '{self.collection_name}'.") + self._log("success", f"Added {count} document(s) to '{self.collection_name}'.") return { "success": True, "message": f"{count} document(s) added to collection '{self.collection_name}'", @@ -288,7 +306,7 @@ def fetch_all( result = self._collection.get(**kwargs) ids = result.get("ids", []) - self._log("info", f"Fetched {len(ids)} document(s) from '{self.collection_name}'.") + self._log("success", f"Fetched {len(ids)} document(s) from '{self.collection_name}'.") return { "success": True, "message": f"Fetched {len(ids)} document(s) from collection '{self.collection_name}'", @@ -344,22 +362,26 @@ def fetch_one(self, id: str) -> Dict[str, Any]: - Add `"embeddings"` to the `include` list to return raw vectors. """ if not id: - return {"success": False, "message": "id must be a non-empty string.", "data": None} + msg = "id must be a non-empty string." + self._log("error", msg, "moderate") + return {"success": False, "message": msg, "data": None} try: result = self._collection.get(ids=[id], include=["documents", "metadatas"]) ids = result.get("ids", []) if not ids: + msg = f"Document '{id}' not found in collection '{self.collection_name}'" + self._log("info", msg) # Not an error, just not found return { "success": True, - "message": f"Document '{id}' not found in collection '{self.collection_name}'", + "message": msg, "data": None, } docs = result.get("documents") or [None] metas = result.get("metadatas") or [None] - self._log("info", f"Document '{id}' fetched from '{self.collection_name}'.") + self._log("success", f"Document '{id}' fetched from '{self.collection_name}'.") return { "success": True, "message": f"Document '{id}' fetched successfully", @@ -430,12 +452,16 @@ def update( - For upsert (add if missing), call `_collection.upsert()` directly. """ if not ids: - return {"success": False, "message": "ids must be a non-empty list.", "data": None} + msg = "ids must be a non-empty list." + self._log("error", msg, "moderate") + return {"success": False, "message": msg, "data": None} if documents is None and metadatas is None and embeddings is None: + msg = "Update requires at least one of: documents, metadatas, embeddings." + self._log("error", msg, "moderate") return { "success": False, - "message": "Update requires at least one of: documents, metadatas, embeddings.", + "message": msg, "data": None, } @@ -450,7 +476,7 @@ def update( self._collection.update(**kwargs) count = len(ids) - self._log("info", f"Updated {count} document(s) in '{self.collection_name}'.") + self._log("success", f"Updated {count} document(s) in '{self.collection_name}'.") return { "success": True, "message": f"{count} document(s) updated in collection '{self.collection_name}'", @@ -501,16 +527,18 @@ def delete(self, ids: List[str]) -> Dict[str, Any]: - For metadata-based bulk deletions, extend to call `_collection.delete(where={...})`. """ if not ids: + msg = "delete() requires a non-empty ids list." + self._log("error", msg, "moderate") return { "success": False, - "message": "delete() requires a non-empty ids list.", + "message": msg, "data": None, } try: self._collection.delete(ids=ids) count = len(ids) - self._log("info", f"Deleted {count} document(s) from '{self.collection_name}'.") + self._log("success", f"Deleted {count} document(s) from '{self.collection_name}'.") return { "success": True, "message": f"{count} document(s) deleted from collection '{self.collection_name}'", @@ -606,12 +634,12 @@ def _initialize_chroma_store() -> None: chroma_store_dir.mkdir(parents=True, exist_ok=True) _std_logger.info(f"ChromaDB storage directory ensured: {chroma_store_dir}") dr_logger.log( - log_type="info", + log_type="success", message=f"ChromaDB storage directory ensured: {chroma_store_dir}", origin="system", module="DB", urgency="none", - app_version="1.0", + app_version=getAppVersion(), ) except Exception as e: _std_logger.error(f"Failed to initialize ChromaDB store directory: {e}") @@ -622,19 +650,23 @@ def _initialize_chroma_store() -> None: origin="system", module="DB", urgency="critical", - app_version="1.0", + app_version=getAppVersion(), ) except Exception: pass # Run on import -_initialize_chroma_store() +if not any("unittest" in arg for arg in sys.argv) and not any("pytest" in arg for arg in sys.argv): + _initialize_chroma_store() # Singleton export — SDK-style, mirrors DBManager pattern _chroma_store_path = BASE_DIR / "database" / "chroma_store" if _CHROMA_AVAILABLE: + # This instantiation triggers two things: + # 1. chromadb.PersistentClient(...) -> Creates the folder/database if missing. + # 2. get_or_create_collection(...) -> Creates the specific collection if missing. db_vector_manager = DBVectorManager( persist_directory=_chroma_store_path, collection_name="research_documents", diff --git a/backend/main/src/utils/versionManagement.py b/backend/main/src/utils/versionManagement.py index 3d3fec6..10711a4 100644 --- a/backend/main/src/utils/versionManagement.py +++ b/backend/main/src/utils/versionManagement.py @@ -13,6 +13,11 @@ LOG_TAGS = ["VERSION_MANAGEMENT"] +def getAppVersion(): + """Public function to get the current application version.""" + return _get_version() + + def _get_version(): return f"{MAJOR_CHANGE}.{NEW_FEATURE}.{MINOR_BUGFIXES}" diff --git a/backend/tests/test_db_vector.py b/backend/tests/test_db_vector.py index 84e15ed..1dc2275 100644 --- a/backend/tests/test_db_vector.py +++ b/backend/tests/test_db_vector.py @@ -46,6 +46,11 @@ def setUp(self) -> None: def tearDown(self) -> None: self.patcher.stop() + def test_log(self) -> None: + """Verify that logs are emitted.""" + self.manager._log("info", "Test log message") + self.mock_logger.log.assert_called() + # ----------------------------------------------------------------------- def test_add_and_fetch_one(self) -> None: From 942038ef59bba511db73072ee76127904840f86d Mon Sep 17 00:00:00 2001 From: Ronit Kiran Murmu Date: Mon, 2 Mar 2026 19:59:04 +0530 Subject: [PATCH 3/4] Refactor: Extract global _log_db_event function --- backend/main/src/store/DBVector.py | 104 ++++++++++------------------- 1 file changed, 36 insertions(+), 68 deletions(-) diff --git a/backend/main/src/store/DBVector.py b/backend/main/src/store/DBVector.py index 7e7be04..7667420 100644 --- a/backend/main/src/store/DBVector.py +++ b/backend/main/src/store/DBVector.py @@ -32,6 +32,36 @@ _std_logger = logging.getLogger(__name__) +def _log_db_event(level: str, message: str, urgency: str = "none") -> None: + """ + ## Description + + Global helper to emit log entries to the standard Python logger and `DRLogger`. + + ## Parameters + + - `level` (`str`) — `"info"`, `"success"`, or `"error"`. + - `message` (`str`) — Event description. + - `urgency` (`str`) — `"none"`, `"moderate"`, or `"critical"`. Defaults to `"none"`. + """ + if level == "error": + _std_logger.error(message) + else: + _std_logger.info(message) + + try: + dr_logger.log( + log_type=level, + message=message, + origin="system", + module="DB", + urgency=urgency, # type: ignore + app_version=getAppVersion(), + ) + except Exception as e: + _std_logger.error(f"DRLogger internal failure in DBVectorManager: {e}") + + class DBVectorManager: """ ## Description @@ -79,14 +109,7 @@ def __init__(self, persist_directory: Union[str, Path], collection_name: str) -> error_msg = "chromadb is not installed. Run: uv add chromadb" _std_logger.error(error_msg) try: - dr_logger.log( - log_type="error", - message=error_msg, - origin="system", - module="DB", - urgency="critical", - app_version=getAppVersion(), - ) + _log_db_event("error", error_msg, "critical") except Exception: pass raise RuntimeError(error_msg) @@ -100,50 +123,10 @@ def _log(self, level: str, message: str, urgency: str = "none") -> None: """ ## Description - Emits a log entry to the standard Python logger and `DRLogger`. - - ## Parameters - - - `level` (`str`) — `"info"`, `"success"`, or `"error"`. - - `message` (`str`) — Event description. - - `urgency` (`str`) — `"none"`, `"moderate"`, or `"critical"`. Defaults to `"none"`. - - ## Returns - - `None` - - ## Raises - - - `None` — DRLogger failures are silently caught. - - ## Side Effects - - - Writes to terminal and to `logs.db.sqlite3` via `DRLogger`. - - ## Debug Notes - - - DRLogger failures are printed to terminal but do not raise. - - ## Customization - - - Extend with `"warning"` level support if needed. + Instance-level wrapper for global `_log_db_event`. + Preserves existing API for methods within this class. """ - if level == "error": - _std_logger.error(message) - else: - _std_logger.info(message) - - try: - dr_logger.log( - log_type=level, - message=message, - origin="system", - module="DB", - urgency=urgency, # type: ignore - app_version=getAppVersion(), - ) - except Exception as e: - _std_logger.error(f"DRLogger internal failure in DBVectorManager: {e}") + _log_db_event(level, message, urgency) def add( self, @@ -632,26 +615,11 @@ def _initialize_chroma_store() -> None: chroma_store_dir = BASE_DIR / "database" / "chroma_store" try: chroma_store_dir.mkdir(parents=True, exist_ok=True) - _std_logger.info(f"ChromaDB storage directory ensured: {chroma_store_dir}") - dr_logger.log( - log_type="success", - message=f"ChromaDB storage directory ensured: {chroma_store_dir}", - origin="system", - module="DB", - urgency="none", - app_version=getAppVersion(), - ) + _log_db_event("success", f"ChromaDB storage directory ensured: {chroma_store_dir}") except Exception as e: _std_logger.error(f"Failed to initialize ChromaDB store directory: {e}") try: - dr_logger.log( - log_type="error", - message=f"Failed to initialize ChromaDB store directory: {e}", - origin="system", - module="DB", - urgency="critical", - app_version=getAppVersion(), - ) + _log_db_event("error", f"Failed to initialize ChromaDB store directory: {e}", "critical") except Exception: pass From 5ab717f3541c6441730e58fdd9bbec73943456b6 Mon Sep 17 00:00:00 2001 From: Ronit Kiran Murmu Date: Wed, 4 Mar 2026 20:40:06 +0530 Subject: [PATCH 4/4] updated CRUD for Chorma Vector store --- backend/main/src/store/DBVector.py | 141 +++++++++++++---------------- backend/tests/test_db_vector.py | 5 +- 2 files changed, 69 insertions(+), 77 deletions(-) diff --git a/backend/main/src/store/DBVector.py b/backend/main/src/store/DBVector.py index 7667420..4554e87 100644 --- a/backend/main/src/store/DBVector.py +++ b/backend/main/src/store/DBVector.py @@ -8,7 +8,9 @@ from typing import Any, Dict, List, Optional, Union import logging import sys - +import chromadb +from main.src.utils.DRLogger import dr_logger, LogType +from main.src.utils.versionManagement import getAppVersion # ------------------------------------------------------------------ # Path bootstrap # ------------------------------------------------------------------ @@ -19,43 +21,23 @@ if _p not in sys.path: sys.path.append(_p) -from main.src.utils.DRLogger import dr_logger -from main.src.utils.versionManagement import getAppVersion - -try: - import chromadb - _CHROMA_AVAILABLE = True -except ImportError: - _CHROMA_AVAILABLE = False - logging.basicConfig(level=logging.INFO) _std_logger = logging.getLogger(__name__) - -def _log_db_event(level: str, message: str, urgency: str = "none") -> None: - """ - ## Description - - Global helper to emit log entries to the standard Python logger and `DRLogger`. - - ## Parameters - - - `level` (`str`) — `"info"`, `"success"`, or `"error"`. - - `message` (`str`) — Event description. - - `urgency` (`str`) — `"none"`, `"moderate"`, or `"critical"`. Defaults to `"none"`. - """ +def global_log(level: LogType, message: str, urgency: str = "none") -> None: if level == "error": _std_logger.error(message) + elif level == "warning": + _std_logger.warning(message) else: _std_logger.info(message) - try: dr_logger.log( log_type=level, message=message, origin="system", module="DB", - urgency=urgency, # type: ignore + urgency=urgency, app_version=getAppVersion(), ) except Exception as e: @@ -88,7 +70,7 @@ class DBVectorManager: ## Raises - - `RuntimeError` — when `chromadb` is not installed. + - `None` ## Side Effects @@ -97,7 +79,7 @@ class DBVectorManager: ## Debug Notes - - Install chromadb via `uv add chromadb` if import fails. + - Ensure `chromadb` is installed via `uv add chromadb`. ## Customization @@ -105,28 +87,12 @@ class DBVectorManager: """ def __init__(self, persist_directory: Union[str, Path], collection_name: str) -> None: - if not _CHROMA_AVAILABLE: - error_msg = "chromadb is not installed. Run: uv add chromadb" - _std_logger.error(error_msg) - try: - _log_db_event("error", error_msg, "critical") - except Exception: - pass - raise RuntimeError(error_msg) - self.persist_directory = str(persist_directory) self.collection_name = collection_name + global_log("info", f"Initializing DBVectorManager for '{self.collection_name}' at '{self.persist_directory}'") self._client = chromadb.PersistentClient(path=self.persist_directory) self._collection = self._client.get_or_create_collection(name=self.collection_name) - - def _log(self, level: str, message: str, urgency: str = "none") -> None: - """ - ## Description - - Instance-level wrapper for global `_log_db_event`. - Preserves existing API for methods within this class. - """ - _log_db_event(level, message, urgency) + global_log("success", f"DBVectorManager initialized for '{self.collection_name}'") def add( self, @@ -187,9 +153,11 @@ def add( - For upsert semantics, extend to call `_collection.upsert()`. """ + global_log("info", f"Executing add() for {len(ids) if ids else 0} document(s) to '{self.collection_name}'.") + if not ids: msg = "ids must be a non-empty list." - self._log("error", msg, "moderate") + global_log("error", msg, "moderate") return {"success": False, "message": msg, "data": None} try: @@ -198,7 +166,7 @@ def add( if existing.get("ids"): found = existing["ids"][0] msg = f"ID '{found}' already exists." - self._log("error", msg, "moderate") + global_log("error", msg, "moderate") return {"success": False, "message": msg, "data": None} kwargs: Dict[str, Any] = {"ids": ids} @@ -211,14 +179,14 @@ def add( self._collection.add(**kwargs) count = len(ids) - self._log("success", f"Added {count} document(s) to '{self.collection_name}'.") + global_log("success", f"Added {count} document(s) to '{self.collection_name}'.") return { "success": True, "message": f"{count} document(s) added to collection '{self.collection_name}'", "data": {"count": count}, } except Exception as e: - self._log("error", f"Error adding to '{self.collection_name}': {e}", "moderate") + global_log("error", f"Error adding to '{self.collection_name}': {e}", "moderate") return {"success": False, "message": str(e), "data": None} def fetch_all( @@ -278,6 +246,7 @@ def fetch_all( - Add `"embeddings"` to the `include` list inside `.get()` if raw vectors are needed. """ + global_log("info", f"Executing fetch_all() on '{self.collection_name}'. limit={limit}, offset={offset}") try: kwargs: Dict[str, Any] = {"include": ["documents", "metadatas"]} if where is not None: @@ -289,7 +258,7 @@ def fetch_all( result = self._collection.get(**kwargs) ids = result.get("ids", []) - self._log("success", f"Fetched {len(ids)} document(s) from '{self.collection_name}'.") + global_log("success", f"Fetched {len(ids)} document(s) from '{self.collection_name}'.") return { "success": True, "message": f"Fetched {len(ids)} document(s) from collection '{self.collection_name}'", @@ -301,7 +270,7 @@ def fetch_all( }, } except Exception as e: - self._log("error", f"Error fetching from '{self.collection_name}': {e}", "moderate") + global_log("error", f"Error fetching from '{self.collection_name}': {e}", "moderate") return {"success": False, "message": str(e), "data": None} def fetch_one(self, id: str) -> Dict[str, Any]: @@ -344,9 +313,11 @@ def fetch_one(self, id: str) -> Dict[str, Any]: - Add `"embeddings"` to the `include` list to return raw vectors. """ + global_log("info", f"Executing fetch_one() for '{id}' on '{self.collection_name}'.") + if not id: msg = "id must be a non-empty string." - self._log("error", msg, "moderate") + global_log("error", msg, "moderate") return {"success": False, "message": msg, "data": None} try: @@ -355,7 +326,7 @@ def fetch_one(self, id: str) -> Dict[str, Any]: if not ids: msg = f"Document '{id}' not found in collection '{self.collection_name}'" - self._log("info", msg) # Not an error, just not found + global_log("info", msg) # Not an error, just not found return { "success": True, "message": msg, @@ -364,14 +335,14 @@ def fetch_one(self, id: str) -> Dict[str, Any]: docs = result.get("documents") or [None] metas = result.get("metadatas") or [None] - self._log("success", f"Document '{id}' fetched from '{self.collection_name}'.") + global_log("success", f"Document '{id}' fetched from '{self.collection_name}'.") return { "success": True, "message": f"Document '{id}' fetched successfully", "data": {"id": ids[0], "document": docs[0], "metadata": metas[0]}, } except Exception as e: - self._log("error", f"Error fetching '{id}' from '{self.collection_name}': {e}", "moderate") + global_log("error", f"Error fetching '{id}' from '{self.collection_name}': {e}", "moderate") return {"success": False, "message": str(e), "data": None} def update( @@ -434,14 +405,16 @@ def update( - For upsert (add if missing), call `_collection.upsert()` directly. """ + global_log("info", f"Executing update() for {len(ids) if ids else 0} document(s) in '{self.collection_name}'.") + if not ids: msg = "ids must be a non-empty list." - self._log("error", msg, "moderate") + global_log("error", msg, "moderate") return {"success": False, "message": msg, "data": None} if documents is None and metadatas is None and embeddings is None: msg = "Update requires at least one of: documents, metadatas, embeddings." - self._log("error", msg, "moderate") + global_log("error", msg, "moderate") return { "success": False, "message": msg, @@ -459,14 +432,14 @@ def update( self._collection.update(**kwargs) count = len(ids) - self._log("success", f"Updated {count} document(s) in '{self.collection_name}'.") + global_log("success", f"Updated {count} document(s) in '{self.collection_name}'.") return { "success": True, "message": f"{count} document(s) updated in collection '{self.collection_name}'", "data": {"count": count}, } except Exception as e: - self._log("error", f"Error updating '{self.collection_name}': {e}", "moderate") + global_log("error", f"Error updating '{self.collection_name}': {e}", "moderate") return {"success": False, "message": str(e), "data": None} def delete(self, ids: List[str]) -> Dict[str, Any]: @@ -509,9 +482,11 @@ def delete(self, ids: List[str]) -> Dict[str, Any]: - For metadata-based bulk deletions, extend to call `_collection.delete(where={...})`. """ + global_log("info", f"Executing delete() for {len(ids) if ids else 0} document(s) from '{self.collection_name}'.") + if not ids: msg = "delete() requires a non-empty ids list." - self._log("error", msg, "moderate") + global_log("error", msg, "moderate") return { "success": False, "message": msg, @@ -521,14 +496,14 @@ def delete(self, ids: List[str]) -> Dict[str, Any]: try: self._collection.delete(ids=ids) count = len(ids) - self._log("success", f"Deleted {count} document(s) from '{self.collection_name}'.") + global_log("success", f"Deleted {count} document(s) from '{self.collection_name}'.") return { "success": True, "message": f"{count} document(s) deleted from collection '{self.collection_name}'", "data": {"count": count}, } except Exception as e: - self._log("error", f"Error deleting from '{self.collection_name}': {e}", "moderate") + global_log("error", f"Error deleting from '{self.collection_name}': {e}", "moderate") return {"success": False, "message": str(e), "data": None} def collection_exists(self) -> Dict[str, Any]: @@ -566,6 +541,8 @@ def collection_exists(self) -> Dict[str, Any]: - Use `count` to drive pagination decisions in `fetch_all()`. """ + global_log("info", f"Executing collection_exists() for '{self.collection_name}'.") + try: count = self._collection.count() return { @@ -574,7 +551,7 @@ def collection_exists(self) -> Dict[str, Any]: "data": {"collection_name": self.collection_name, "count": count}, } except Exception as e: - self._log("error", f"Collection '{self.collection_name}' not accessible: {e}", "critical") + global_log("error", f"Collection '{self.collection_name}' not accessible: {e}", "critical") return { "success": False, "message": f"Collection '{self.collection_name}' is not accessible: {e}", @@ -615,29 +592,41 @@ def _initialize_chroma_store() -> None: chroma_store_dir = BASE_DIR / "database" / "chroma_store" try: chroma_store_dir.mkdir(parents=True, exist_ok=True) - _log_db_event("success", f"ChromaDB storage directory ensured: {chroma_store_dir}") + _std_logger.info(f"ChromaDB storage directory ensured: {chroma_store_dir}") + dr_logger.log( + log_type="success", + message=f"ChromaDB storage directory ensured: {chroma_store_dir}", + origin="system", + module="DB", + urgency="none", + app_version=getAppVersion(), + ) except Exception as e: _std_logger.error(f"Failed to initialize ChromaDB store directory: {e}") try: - _log_db_event("error", f"Failed to initialize ChromaDB store directory: {e}", "critical") + dr_logger.log( + log_type="error", + message=f"Failed to initialize ChromaDB store directory: {e}", + origin="system", + module="DB", + urgency="critical", + app_version=getAppVersion(), + ) except Exception: pass # Run on import if not any("unittest" in arg for arg in sys.argv) and not any("pytest" in arg for arg in sys.argv): - _initialize_chroma_store() + _initialize_chroma_store() # Singleton export — SDK-style, mirrors DBManager pattern _chroma_store_path = BASE_DIR / "database" / "chroma_store" -if _CHROMA_AVAILABLE: - # This instantiation triggers two things: - # 1. chromadb.PersistentClient(...) -> Creates the folder/database if missing. - # 2. get_or_create_collection(...) -> Creates the specific collection if missing. - db_vector_manager = DBVectorManager( - persist_directory=_chroma_store_path, - collection_name="research_documents", - ) -else: - db_vector_manager = None # type: ignore +# This instantiation triggers two things: +# 1. chromadb.PersistentClient(...) -> Creates the folder/database if missing. +# 2. get_or_create_collection(...) -> Creates the specific collection if missing. +db_vector_manager = DBVectorManager( + persist_directory=_chroma_store_path, + collection_name="research_documents", +) diff --git a/backend/tests/test_db_vector.py b/backend/tests/test_db_vector.py index 1dc2275..5dc3348 100644 --- a/backend/tests/test_db_vector.py +++ b/backend/tests/test_db_vector.py @@ -37,6 +37,8 @@ def setUp(self) -> None: # Mock dr_logger to avoid persistent side effects self.patcher = patch("main.src.store.DBVector.dr_logger") self.mock_logger = self.patcher.start() + self.patcher_global = patch("main.src.store.DBVector.global_log") + self.mock_global_log = self.patcher_global.start() # Unique collection name per test to ensure isolation import uuid @@ -45,11 +47,12 @@ def setUp(self) -> None: def tearDown(self) -> None: self.patcher.stop() + self.patcher_global.stop() def test_log(self) -> None: """Verify that logs are emitted.""" self.manager._log("info", "Test log message") - self.mock_logger.log.assert_called() + self.mock_global_log.assert_called() # -----------------------------------------------------------------------