krisoye · krisoye13 · Feb 2, 2026 · Feb 2, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "document-analysis-mcp"
-version = "0.3.0"
+version = "0.4.0"
 description = "General-purpose Document Analysis MCP server for PDF processing"
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/src/document_analysis_mcp/server.py b/src/document_analysis_mcp/server.py
@@ -17,12 +17,13 @@
 from document_analysis_mcp.config import get_settings
 from document_analysis_mcp.tools.classify import pdf_classify
 from document_analysis_mcp.tools.extract import pdf_extract_full
+from document_analysis_mcp.tools.kb_ingest import pdf_kb_ingest
 from document_analysis_mcp.tools.ocr import pdf_ocr
 from document_analysis_mcp.tools.structure import pdf_extract_structure
 from document_analysis_mcp.tracking import get_tracker
 
 # Server version - should match pyproject.toml
-__version__ = "0.3.0"
+__version__ = "0.4.0"
 
 # Track server startup time for uptime calculation
 _startup_time: datetime | None = None
@@ -260,6 +261,48 @@ def pdf_extract_structure_tool(
     )
 
 
+@mcp.tool()
+def pdf_kb_ingest_tool(
+    pdf_content: str,
+    title: str | None = None,
+    source_url: str | None = None,
+    content_type: str | None = None,
+    max_chunk_size: int = 4000,
+    max_file_size_mb: float = 50.0,
+    use_cache: bool = True,
+) -> dict[str, Any]:
+    """One-shot PDF ingestion for knowledge bank.
+
+    Combines extraction, classification, and chunking into a single operation
+    optimized for batch KB processing. The output format is designed to be
+    directly consumable by knowledge-bank-tools.
+
+    Args:
+        pdf_content: Base64-encoded PDF content.
+        title: Document title. If not provided, will be extracted from PDF.
+        source_url: Source URL for the document.
+        content_type: Document type (research_paper, technical_doc, financial_report,
+            legal_doc, manual, other). If not provided, will be auto-classified.
+        max_chunk_size: Maximum characters per chunk (default 4000).
+        max_file_size_mb: Maximum allowed file size in megabytes.
+        use_cache: Whether to use caching for previously processed documents.
+
+    Returns:
+        Dictionary containing success, title, content_type, chunks (with text,
+        page_numbers, word_count), metadata (page_count, has_tables, has_toc),
+        and processing_stats.
+    """
+    return pdf_kb_ingest(
+        pdf_content=pdf_content,
+        title=title,
+        source_url=source_url,
+        content_type=content_type,
+        max_chunk_size=max_chunk_size,
+        max_file_size_mb=max_file_size_mb,
+        use_cache=use_cache,
+    )
+
+
 @mcp.tool()
 def cache_stats() -> dict[str, Any]:
     """Get cache statistics and usage information.

diff --git a/src/document_analysis_mcp/tools/__init__.py b/src/document_analysis_mcp/tools/__init__.py
@@ -9,6 +9,10 @@
     PDF_EXTRACT_FULL_METADATA,
     pdf_extract_full,
 )
+from document_analysis_mcp.tools.kb_ingest import (
+    PDF_KB_INGEST_METADATA,
+    pdf_kb_ingest,
+)
 
 __all__ = [
     # Extract tool
@@ -18,4 +22,7 @@
     "pdf_classify",
     "PDF_CLASSIFY_METADATA",
     "DocumentType",
+    # KB Ingest tool
+    "pdf_kb_ingest",
+    "PDF_KB_INGEST_METADATA",
 ]