krisoye · krisoye13 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,56 @@
+name: Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.10', '3.11', '3.12']
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .[dev]
+
+      - name: Run tests with coverage
+        run: |
+          pytest tests/ -v --cov=src/document_analysis_mcp --cov-report=term --cov-report=xml
+
+      - name: Upload coverage to Codecov
+        if: matrix.python-version == '3.12'
+        uses: codecov/codecov-action@v4
+        with:
+          files: coverage.xml
+          fail_ci_if_error: false
+
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install ruff
+        run: pip install ruff
+
+      - name: Run ruff check
+        run: ruff check src/ tests/
+
+      - name: Run ruff format check
+        run: ruff format --check src/ tests/
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
 dev = [
     "pytest>=8.0.0",
     "pytest-asyncio>=0.23.0",
+    "pytest-cov>=4.0.0",
     "reportlab>=4.0.0",
     "ruff>=0.5.0",
 ]

diff --git a/src/document_analysis_mcp/config.py b/src/document_analysis_mcp/config.py
@@ -137,9 +137,7 @@ def ensure_cache_dir_exists(self) -> Path:
         """
         if self.cache_dir.exists():
             if not self.cache_dir.is_dir():
-                raise ValueError(
-                    f"Cache path exists but is not a directory: {self.cache_dir}"
-                )
+                raise ValueError(f"Cache path exists but is not a directory: {self.cache_dir}")
             return self.cache_dir
 
         try:
@@ -167,8 +165,7 @@ def validate_required(self) -> None:
         """
         if not self.has_api_key:
             raise ValueError(
-                "ANTHROPIC_API_KEY is required. "
-                "Set it in your environment or .env file."
+                "ANTHROPIC_API_KEY is required. Set it in your environment or .env file."
             )
 
 

diff --git a/src/document_analysis_mcp/processors/chunker.py b/src/document_analysis_mcp/processors/chunker.py
@@ -183,7 +183,9 @@ def chunk_text(
         max_pages = 3
         text = _extract_pages(text, list(range(1, max_pages + 1)))
         estimated_tokens = estimate_tokens(text)
-        logger.info("QUICK strategy: Limited to %d pages, %d estimated tokens", max_pages, estimated_tokens)
+        logger.info(
+            "QUICK strategy: Limited to %d pages, %d estimated tokens", max_pages, estimated_tokens
+        )
 
     elif strategy == ChunkingStrategy.COMPREHENSIVE:
         # Comprehensive: Full document, moderate chunk size

diff --git a/src/document_analysis_mcp/processors/llm.py b/src/document_analysis_mcp/processors/llm.py
@@ -150,9 +150,7 @@ def analyze_chunk(
         user_message = f"{prompt}\n\n---\n\nDocument Content:\n{chunk}"
 
         # Build messages list
-        messages: list[dict[str, Any]] = [
-            {"role": "user", "content": user_message}
-        ]
+        messages: list[dict[str, Any]] = [{"role": "user", "content": user_message}]
 
         # Log the request (truncate chunk for readability)
         estimated_input = estimate_tokens(user_message)
@@ -292,7 +290,9 @@ def _synthesize_summary(
         except (APIConnectionError, APIStatusError) as e:
             logger.error("Failed to synthesize summary: %s", e)
             # Fall back to concatenated summaries
-            return "\n\n".join(f"**Section {i}:**\n{a}" for i, a in enumerate(chunk_analyses, start=1))
+            return "\n\n".join(
+                f"**Section {i}:**\n{a}" for i, a in enumerate(chunk_analyses, start=1)
+            )
 
     def _get_chunk_prompt(self, strategy: ChunkingStrategy) -> str:
         """Get the appropriate chunk analysis prompt for a strategy.
@@ -311,7 +311,6 @@ def _get_chunk_prompt(self, strategy: ChunkingStrategy) -> str:
 - Critical information
 
 Keep your response concise (2-3 paragraphs).""",
-
             ChunkingStrategy.COMPREHENSIVE: """Analyze this document section thoroughly.
 Include:
 - Main topics and themes
@@ -320,7 +319,6 @@ def _get_chunk_prompt(self, strategy: ChunkingStrategy) -> str:
 - Relationships between concepts
 
 Provide a structured analysis with clear organization.""",
-
             ChunkingStrategy.DEEP: """Perform a detailed analysis of this document section.
 Cover:
 - Primary and secondary themes

diff --git a/src/document_analysis_mcp/processors/text_extractor.py b/src/document_analysis_mcp/processors/text_extractor.py
@@ -148,14 +148,10 @@ def _extract_with_pdfplumber(
                         total_words += len(page_content.text.split())
                         tables_count += len(page_content.tables)
                     except (PDFSyntaxError, PSEOF) as e:
-                        logger.warning(
-                            "PDF syntax error on page %d: %s", page_num, e
-                        )
+                        logger.warning("PDF syntax error on page %d: %s", page_num, e)
                         pages.append(PageContent(page_number=page_num, text=""))
                     except (ValueError, TypeError, AttributeError) as e:
-                        logger.warning(
-                            "Content extraction error on page %d: %s", page_num, e
-                        )
+                        logger.warning("Content extraction error on page %d: %s", page_num, e)
                         pages.append(PageContent(page_number=page_num, text=""))
 
         except (PDFSyntaxError, PSEOF) as e:
@@ -194,9 +190,7 @@ def _extract_with_pdfplumber(
             success=True,
         )
 
-    def _extract_pdfplumber_page(
-        self, page: pdfplumber.page.Page, page_num: int
-    ) -> PageContent:
+    def _extract_pdfplumber_page(self, page: pdfplumber.page.Page, page_num: int) -> PageContent:
         """Extract content from a single pdfplumber page.
 
         Args:
@@ -229,9 +223,7 @@ def _extract_pdfplumber_page(
             tables=tables,
         )
 
-    def _extract_pdfplumber_metadata(
-        self, pdf: pdfplumber.PDF
-    ) -> DocumentMetadata:
+    def _extract_pdfplumber_metadata(self, pdf: pdfplumber.PDF) -> DocumentMetadata:
         """Extract metadata from pdfplumber PDF object.
 
         Args:
@@ -298,9 +290,7 @@ def _extract_with_pypdf2(
                     total_pages,
                 )
 
-            for page_num, page in enumerate(
-                reader.pages[:pages_to_process], start=1
-            ):
+            for page_num, page in enumerate(reader.pages[:pages_to_process], start=1):
                 try:
                     text = page.extract_text() or ""
                     page_content = PageContent(
@@ -312,9 +302,7 @@ def _extract_with_pypdf2(
                     total_chars += page_content.char_count
                     total_words += len(text.split())
                 except PdfReadError as e:
-                    logger.warning(
-                        "PDF read error on page %d with PyPDF2: %s", page_num, e
-                    )
+                    logger.warning("PDF read error on page %d with PyPDF2: %s", page_num, e)
                     pages.append(PageContent(page_number=page_num, text=""))
                 except (ValueError, TypeError, AttributeError) as e:
                     logger.warning(

diff --git a/src/document_analysis_mcp/server.py b/src/document_analysis_mcp/server.py
@@ -31,9 +31,7 @@ def _setup_logging(level: str) -> None:
     Args:
         level: Logging level string (DEBUG, INFO, WARNING, ERROR).
     """
-    log_format = (
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-    )
+    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
     logging.basicConfig(
         level=getattr(logging, level.upper(), logging.INFO),
         format=log_format,
@@ -212,8 +210,11 @@ def main() -> None:
     logger.info("API Key Configured: %s", settings.has_api_key)
     logger.info("Default Model: %s", settings.default_model)
     logger.info("Classification Model: %s", settings.classification_model)
-    logger.info("Health endpoint available at: http://%s:%d/health",
-                settings.doc_analysis_host, settings.doc_analysis_port)
+    logger.info(
+        "Health endpoint available at: http://%s:%d/health",
+        settings.doc_analysis_host,
+        settings.doc_analysis_port,
+    )
 
     # Ensure cache directory exists
     settings.ensure_cache_dir_exists()

diff --git a/src/document_analysis_mcp/tools/extract.py b/src/document_analysis_mcp/tools/extract.py
@@ -178,10 +178,12 @@ def pdf_extract_full(
     tables = []
     for page in result.pages:
         for table in page.tables:
-            tables.append({
-                "page": page.page_number,
-                "content": table,
-            })
+            tables.append(
+                {
+                    "page": page.page_number,
+                    "content": table,
+                }
+            )
     if tables:
         response["tables"] = tables
 

diff --git a/tests/test_chunker.py b/tests/test_chunker.py
@@ -4,8 +4,6 @@
 splitting for LLM analysis with proper boundary detection.
 """
 
-import pytest
-
 from document_analysis_mcp.processors.chunker import (
     CHARS_PER_TOKEN,
     MODEL_LIMITS,
@@ -78,15 +76,15 @@ def test_paragraph_break_preferred(self):
         break_pos = _find_break_point(text, 55, 0)
         # The position should be right after the paragraph break
         assert break_pos == 43
-        assert text[break_pos - 2:break_pos] == "\n\n"
+        assert text[break_pos - 2 : break_pos] == "\n\n"
 
     def test_sentence_break_fallback(self):
         """Test that sentence breaks are used when no paragraph break found."""
         text = "First sentence. Second sentence. Third sentence."
         # Position where paragraph break won't be found in first half
         break_pos = _find_break_point(text, 35, 0)
         # Should end at a sentence boundary
-        assert text[break_pos - 2:break_pos] == ". "
+        assert text[break_pos - 2 : break_pos] == ". "
 
     def test_word_break_fallback(self):
         """Test that word breaks are used when no sentence break found."""
@@ -176,9 +174,7 @@ def test_small_text_single_chunk(self):
     def test_quick_strategy_limits_pages(self):
         """Test that QUICK strategy limits to first 3 pages."""
         # Create text with page markers
-        pages = "\n\n".join(
-            f"[Page {i}]\nContent for page {i}." for i in range(1, 6)
-        )
+        pages = "\n\n".join(f"[Page {i}]\nContent for page {i}." for i in range(1, 6))
         chunks = chunk_text(pages, ChunkingStrategy.QUICK, total_pages=5)
 
         # Should only contain first 3 pages
@@ -191,9 +187,7 @@ def test_quick_strategy_limits_pages(self):
 
     def test_comprehensive_strategy_processes_all(self):
         """Test that COMPREHENSIVE processes entire document."""
-        pages = "\n\n".join(
-            f"[Page {i}]\nContent for page {i}." for i in range(1, 6)
-        )
+        pages = "\n\n".join(f"[Page {i}]\nContent for page {i}." for i in range(1, 6))
         chunks = chunk_text(pages, ChunkingStrategy.COMPREHENSIVE, total_pages=5)
 
         combined = " ".join(chunks)