GoodEd · jayprakash1 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,29 @@
+name: Tests
+
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: pip-${{ hashFiles('requirements.txt') }}
+          restore-keys: pip-
+
+      - run: pip install -r requirements.txt
+
+      - name: Type check
+        run: mypy --config-file mypy.ini
+
+      - name: Unit and integration tests
+        run: pytest tests/test_prompt_builder.py tests/test_composite_vector_store.py tests/test_neet_rag_integration.py -v
diff --git a/mypy.ini b/mypy.ini
@@ -0,0 +1,15 @@
+[mypy]
+files = src/rag/
+python_version = 3.11
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+check_untyped_defs = true
+ignore_missing_imports = true
+follow_imports = silent
+
+[mypy-src.processors.*]
+ignore_errors = true
+
+[mypy-src.utils.*]
+ignore_errors = true
diff --git a/requirements.txt b/requirements.txt
@@ -45,3 +45,10 @@ redis>=4.0.0
 boto3>=1.34.0
 markdownify>=0.11.0
 pandas>=2.0.0
+
+# Type checking (dev)
+mypy>=1.10.0
+
+# Testing
+pytest>=8.0.0
+pytest-cov>=5.0.0
diff --git a/src/rag/llm_manager.py b/src/rag/llm_manager.py
@@ -1,4 +1,4 @@
-from typing import Optional, Dict, Any, List
+from typing import Optional, Dict, Any, List, cast
 import os
 import base64
 
@@ -10,13 +10,13 @@ def __init__(
         model: str = "llama3.2",
         api_key: Optional[str] = None,
         base_url: Optional[str] = None,
-    ):
+    ) -> None:
         self.provider = provider
         self.model = model
-        self.llm = None
+        self.llm: Optional[Any] = None
         self._initialize_llm(api_key, base_url)
 
-    def _initialize_llm(self, api_key: Optional[str], base_url: Optional[str]):
+    def _initialize_llm(self, api_key: Optional[str], base_url: Optional[str]) -> None:
         if self.provider == "ollama":
             try:
                 from langchain_community.llms import Ollama
@@ -33,7 +33,7 @@ def _initialize_llm(self, api_key: Optional[str], base_url: Optional[str]):
 
                 self.llm = ChatOpenAI(
                     model=self.model,
-                    api_key=api_key or os.getenv("OPENAI_API_KEY"),
+                    api_key=api_key or os.getenv("OPENAI_API_KEY"),  # type: ignore[arg-type]  # LangChain accepts raw str API keys at runtime
                     base_url=base_url or os.getenv("OPENAI_BASE_URL"),
                     temperature=0.7,
                 )
@@ -44,9 +44,9 @@ def _initialize_llm(self, api_key: Optional[str], base_url: Optional[str]):
             try:
                 from langchain_anthropic import ChatAnthropic
 
-                self.llm = ChatAnthropic(
+                self.llm = ChatAnthropic(  # type: ignore[call-arg]  # Runtime accepts model kwarg; stubs may differ
                     model=self.model,
-                    api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
+                    api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),  # type: ignore[arg-type]  # LangChain accepts raw str API keys at runtime
                     temperature=0.7,
                 )
             except ImportError:
@@ -115,7 +115,7 @@ def generate(
 
         response = self.llm.invoke(prompt)
         if hasattr(response, "content"):
-            return response.content
+            return cast(str, response.content)
         return str(response)
 
     def extract_image_context(

diff --git a/src/rag/neet_rag.py b/src/rag/neet_rag.py
@@ -1,4 +1,4 @@
-from typing import Dict, Any, List, Optional, Union
+from typing import Dict, Any, List, Optional, Union, Tuple
 from pathlib import Path
 import os
 import re
@@ -57,6 +57,7 @@ def __init__(
         youtube_subdir = os.path.join(resolved_persist_dir, "youtube")
         csv_subdir = os.path.join(resolved_persist_dir, "csv")
         has_split_indexes = os.path.isdir(youtube_subdir) or os.path.isdir(csv_subdir)
+        self.vector_manager: Union[VectorStoreManager, CompositeVectorStoreManager]
 
         if has_split_indexes:
             self.vector_manager = build_composite_manager(
@@ -80,7 +81,7 @@ def __init__(
         self.prompt_builder = RAGPromptBuilder()
         self._vectorstore_loaded = False
         self.logger = logging.getLogger(__name__)
-        self._source_manager = None
+        self._source_manager: Optional[Any] = None
         self._source_title_cache: Dict[str, str] = {}
 
     @staticmethod
@@ -97,7 +98,7 @@ def _is_meaningful_title(title: str) -> bool:
             return False
         return True
 
-    def _get_source_manager(self):
+    def _get_source_manager(self) -> Optional[Any]:
         if self._source_manager is not None:
             return self._source_manager
         try:
@@ -360,6 +361,7 @@ def _dedupe_docs(self, docs: List[Document]) -> List[Document]:
         deduped = []
         seen = set()
         for doc in docs:
+            key: Tuple[Any, ...]
             source_type = doc.metadata.get("source_type") or doc.metadata.get(
                 "content_type", ""
             )
@@ -464,8 +466,8 @@ def _retrieve_docs_blended(self, question: str, top_k: int) -> List[Document]:
 
     @staticmethod
     def _is_youtube_doc(doc: Document) -> bool:
-        source_type = doc.metadata.get("source_type") or doc.metadata.get(
-            "content_type", ""
+        source_type = str(
+            doc.metadata.get("source_type") or doc.metadata.get("content_type", "")
         )
         return source_type == "youtube"
 
@@ -756,6 +758,6 @@ def get_stats(self) -> Dict[str, Any]:
         except Exception as e:
             return {"error": str(e)}
 
-    def reset_knowledge_base(self):
+    def reset_knowledge_base(self) -> None:
         self.vector_manager.delete_collection()
         self._vectorstore_loaded = False