parth5012 · yashitchugh · Mar 5, 2026
diff --git a/api/agents/__init__.py b/api/agents/__init__.py
@@ -0,0 +1,9 @@
+"""
+Agents Package
+==============
+
+Yeh package mein kya kya hoga:
+- LangGraph agent implementations
+- Agent state management
+- Tool orchestration
+"""
diff --git a/api/agents/chief_of_staff.py b/api/agents/chief_of_staff.py
@@ -0,0 +1,104 @@
+"""
+Chief of Staff Agent - LangGraph Implementation
+================================================
+
+Yeh file mein kya kya hoga:
+- Agent state definition (TypedDict for conversation state)
+- Graph nodes:
+  * retrieval_node: Vector DB se context retrieve karta hai
+  * reasoning_node: LLM se reasoning karta hai
+  * tool_selection_node: Konsa tool use karna hai decide karta hai
+  * tool_execution_node: Selected tool ko execute karta hai
+  * response_generation_node: Final response generate karta hai
+- Conditional edges (decision branching logic)
+- LLM integration (GPT-4/Gemini/Claude)
+- Tool calling logic
+- Memory management (conversation history)
+- Error handling aur fallback strategies
+- Streaming support (real-time responses)
+"""
+
+from typing import TypedDict, Annotated, Sequence
+from langgraph.graph import StateGraph, END
+
+
+class AgentState(TypedDict):
+    """
+    Agent ka state definition
+    """
+
+    # TODO: Define state fields
+    # messages: list
+    # context: str
+    # tools_called: list
+    # final_response: str
+    pass
+
+
+class ChiefOfStaffAgent:
+    """
+    Main agent class - user queries ko handle karta hai
+    """
+
+    def __init__(self):
+        """
+        Initialize agent with LLM, tools, and graph
+        """
+        # TODO: Initialize LangGraph, LLM, tools
+        pass
+
+    def retrieval_node(self, state: AgentState) -> AgentState:
+        """
+        Vector DB se relevant context retrieve karta hai
+        """
+        # TODO: Implement retrieval logic
+        pass
+
+    def reasoning_node(self, state: AgentState) -> AgentState:
+        """
+        LLM se reasoning karta hai - kya karna hai decide karta hai
+        """
+        # TODO: Implement reasoning logic
+        pass
+
+    def tool_selection_node(self, state: AgentState) -> AgentState:
+        """
+        Konsa tool use karna hai decide karta hai
+        """
+        # TODO: Implement tool selection
+        pass
+
+    def tool_execution_node(self, state: AgentState) -> AgentState:
+        """
+        Selected tool ko execute karta hai
+        """
+        # TODO: Implement tool execution
+        pass
+
+    def response_generation_node(self, state: AgentState) -> AgentState:
+        """
+        Final response generate karta hai
+        """
+        # TODO: Implement response generation
+        pass
+
+    def should_use_tool(self, state: AgentState) -> str:
+        """
+        Conditional edge - tool use karna hai ya nahi
+        """
+        # TODO: Implement decision logic
+        pass
+
+    def build_graph(self) -> StateGraph:
+        """
+        LangGraph build karta hai
+        """
+        # TODO: Build and compile graph
+        pass
+
+    async def run(self, query: str, conversation_history: list = None) -> dict:
+        """
+        Main entry point - query ko process karta hai
+        """
+        # TODO: Implement main execution logic
+        pass
diff --git a/api/config.py b/api/config.py
@@ -0,0 +1,25 @@
+"""
+Configuration Management
+========================
+
+Yeh file mein kya kya hoga:
+- Environment variables load karenge (.env file se)
+- API keys store karenge (OpenAI, Google, Pinecone, etc.)
+- Database connection strings
+- Vector DB settings (Chroma/Pinecone config)
+- LLM model names aur parameters
+- Document processing settings (chunk size, overlap)
+- File upload paths aur limits
+- Pydantic Settings class for type-safe config
+"""
+
+from pydantic_settings import BaseSettings
+from typing import Optional
+
+
+class Settings(BaseSettings):
+    # TODO: Add configuration fields here
+    pass
+
+
+settings = Settings()
diff --git a/api/main.py b/api/main.py
@@ -1,6 +1,132 @@
-def main():
-    print("Hello from api!")
+"""
+FastAPI Application - Main Entry Point
+=======================================
+
+Yeh file mein kya kya hoga:
+- FastAPI app initialization
+- CORS middleware setup
+- API endpoints:
+  * POST /ingest - Documents upload karne ke liye
+  * POST /query - User questions puchne ke liye
+  * GET /health - Health check endpoint
+  * GET /documents - Uploaded documents list
+  * DELETE /documents/{id} - Document delete karna
+- Lifespan events (startup/shutdown)
+- Vector DB connection initialization
+- Error handlers (global exception handling)
+- Request logging
+- Rate limiting (future)
+- Authentication middleware (future)
+"""
+
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Startup aur shutdown events handle karta hai
+    """
+    # Startup
+    print("🚀 Starting Memnox API...")
+    # TODO: Initialize vector DB, load models, etc.
+
+    yield
+
+    # Shutdown
+    print("👋 Shutting down Memnox API...")
+    # TODO: Cleanup resources
+
+
+app = FastAPI(
+    title="Memnox API",
+    description="Chief of Staff AI - RAG Engine with LangGraph Agent",
+    version="0.1.0",
+    lifespan=lifespan,
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # TODO: Configure properly for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+@app.get("/")
+async def root():
+    """
+    Root endpoint - API info
+    """
+    return {
+        "message": "Memnox API - Chief of Staff AI",
+        "version": "0.1.0",
+        "status": "running",
+    }
+
+
+@app.get("/health")
+async def health_check():
+    """
+    Health check endpoint
+    """
+    # TODO: Check vector DB connection, LLM availability
+    return {"status": "healthy"}
+
+
+@app.post("/ingest")
+async def ingest_document(file: UploadFile = File(...)):
+    """
+    Document upload aur ingestion endpoint
+
+    Process:
+    1. File validate karo
+    2. Text extract karo (ingestion.py)
+    3. Chunk + embed + store (shadow_librarian.py)
+    4. Success response return karo
+    """
+    # TODO: Implement document ingestion
+    return {"message": "Document ingestion endpoint - Coming soon"}
+
+
+@app.post("/query")
+async def query_agent(query: str):
+    """
+    User query endpoint - Chief of Staff agent ko call karta hai
+
+    Process:
+    1. Query validate karo
+    2. Retrieval service se context retrieve karo
+    3. Agent ko call karo (chief_of_staff.py)
+    4. Response return karo
+    """
+    # TODO: Implement query handling
+    return {"message": "Query endpoint - Coming soon"}
+
+
+@app.get("/documents")
+async def list_documents():
+    """
+    Uploaded documents ki list return karta hai
+    """
+    # TODO: Implement document listing
+    return {"message": "Document listing - Coming soon"}
+
+
+@app.delete("/documents/{document_id}")
+async def delete_document(document_id: str):
+    """
+    Document delete karta hai (vector DB se bhi)
+    """
+    # TODO: Implement document deletion
+    return {"message": "Document deletion - Coming soon"}
 
 
 if __name__ == "__main__":
-    main()
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/api/models/__init__.py b/api/models/__init__.py
@@ -0,0 +1,9 @@
+"""
+Pydantic Models Package
+=======================
+
+Yeh package mein kya kya hoga:
+- Request/Response schemas for FastAPI endpoints
+- Data validation models
+- Type definitions
+"""
diff --git a/api/models/schemas.py b/api/models/schemas.py
@@ -0,0 +1,21 @@
+"""
+API Request/Response Schemas
+=============================
+
+Yeh file mein kya kya hoga:
+- IngestRequest: Document upload ke liye request model
+- IngestResponse: Upload success/failure response
+- QueryRequest: User question ke liye request model
+- QueryResponse: AI answer + sources + metadata
+- Document: Document metadata structure
+- ChatMessage: Conversation history ke liye
+- ToolCall: Agent tool execution tracking
+- AgentState: LangGraph state definition
+"""
+
+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+from datetime import datetime
+
+
+# TODO: Add Pydantic models here
diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -5,11 +5,15 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "chromadb>=1.5.2",
     "fastapi>=0.135.1",
     "langchain>=1.2.10",
     "langchain-community>=0.4.1",
     "langchain-core>=1.2.16",
     "langchain-google-genai>=4.2.1",
     "langgraph>=1.0.10",
     "langsmith>=0.7.9",
+    "llama-index>=0.14.15",
+    "pydantic-settings>=2.13.1",
+    "python-dotenv>=1.2.2",
 ]
diff --git a/api/services/__init__.py b/api/services/__init__.py
@@ -0,0 +1,10 @@
+"""
+Services Package
+================
+
+Yeh package mein kya kya hoga:
+- Core business logic
+- Document processing services
+- RAG engine implementation
+- Retrieval services
+"""
diff --git a/api/services/ingestion.py b/api/services/ingestion.py
@@ -0,0 +1,54 @@
+"""
+Document Ingestion Service
+===========================
+
+Yeh file mein kya kya hoga:
+- PDF document loading (PyPDF2/pdfplumber use karke)
+- DOCX document loading (python-docx use karke)
+- TXT file loading
+- File validation (size, type check)
+- Text extraction aur cleaning
+- Metadata extraction (filename, upload date, author, file type)
+- Background task handling (Celery integration future mein)
+- Error handling for corrupt files
+- Batch processing support
+- File storage management
+"""
+
+
+async def load_pdf(file_path: str) -> dict:
+    """
+    PDF file se text extract karta hai
+
+    Returns:
+        dict: {
+            'text': extracted_text,
+            'metadata': {filename, pages, etc}
+        }
+    """
+    # TODO: Implement PDF loading
+    pass
+
+
+async def load_docx(file_path: str) -> dict:
+    """
+    DOCX file se text extract karta hai
+    """
+    # TODO: Implement DOCX loading
+    pass
+
+
+async def load_txt(file_path: str) -> dict:
+    """
+    TXT file load karta hai
+    """
+    # TODO: Implement TXT loading
+    pass
+
+
+async def ingest_document(file_path: str, file_type: str) -> dict:
+    """
+    Main ingestion function - file type ke basis pe appropriate loader call karta hai
+    """
+    # TODO: Implement main ingestion logic
+    pass