diff --git a/mem_mcp_server/utils/summarizer.py b/mem_mcp_server/utils/summarizer.py index 392c09c..465294a 100644 --- a/mem_mcp_server/utils/summarizer.py +++ b/mem_mcp_server/utils/summarizer.py @@ -27,7 +27,7 @@ class HTTPOpenAISummarizer: HTTP-based OpenAI summarizer that doesn't depend on the openai package """ - def __init__(self, api_key: str = None, model: str = "gpt-4o-mini"): + def __init__(self, api_key: str = None, model: str = "qwen2:0.5b"): """ Initialize the HTTP OpenAI summarizer @@ -48,7 +48,8 @@ def __init__(self, api_key: str = None, model: str = "gpt-4o-mini"): self.api_key = api_key or os.environ.get("OPENAI_API_KEY") self.model = model - self.api_url = "https://api.openai.com/v1/chat/completions" + self.api_url = "http://127.0.0.1:11434/v1/chat/completions" + # self.api_url = "https://api.openai.com/v1/chat/completions" LOGGER.info(f"API key set: {'Yes' if self.api_key else 'No'}") @@ -125,7 +126,7 @@ def generate_summary(self, context: str) -> str: def _get_json_system_prompt(self) -> str: """Get optimized system prompt that ensures JSON output""" - return f"""You are an expert development assistant specializing in analyzing commit history and creating detailed project summaries. + return f"""You are an expert development assistant specializing in analyzing commit history and creating detailed project summaries. You must respond with a valid JSON object following this exact schema: @@ -134,7 +135,7 @@ def _get_json_system_prompt(self) -> str: Instructions for analysis: 1. Analyze each commit chronologically 2. Extract user requests and intents behind changes -3. Identify technical decisions and code patterns +3. Identify technical decisions and code patterns 4. Document file changes with specific details 5. Note errors encountered and how they were resolved 6. Track ongoing troubleshooting efforts @@ -309,6 +310,8 @@ def create_summary_from_commits( # Try AI summarization using HTTP client (no dependencies) summarizer = HTTPOpenAISummarizer() ai_summary = summarizer.generate_summary(context) + print("AI Summary Generated:") + print(ai_summary) return { "ai_generated_summary": ai_summary, diff --git a/memov/debugging/__init__.py b/memov/debugging/__init__.py new file mode 100644 index 0000000..3c23bcd --- /dev/null +++ b/memov/debugging/__init__.py @@ -0,0 +1,5 @@ +"""Debugging utilities for memov.""" + +from .llm_client import LLMClient + +__all__ = ["LLMClient"] diff --git a/memov/debugging/llm_client.py b/memov/debugging/llm_client.py new file mode 100644 index 0000000..b791c13 --- /dev/null +++ b/memov/debugging/llm_client.py @@ -0,0 +1,152 @@ +""" +LLM client for document generation. + +Provides a unified interface for calling various LLM providers. +Uses litellm for multi-provider support. +""" + +import logging +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +class LLMClient: + """Client for interacting with LLM APIs.""" + + def __init__(self, models: List[str], api_key: Optional[str] = None): + """ + Initialize LLM client. + + Args: + models: List of model names to use + api_key: Optional API key (will use environment variables if not provided) + """ + self.models = models + self.api_key = api_key + + # Try to import litellm + try: + import litellm + self.litellm = litellm + self.available = True + + # Configure litellm + if api_key: + litellm.api_key = api_key + + # Suppress verbose logging + litellm.set_verbose = False + + except ImportError: + logger.warning( + "litellm not installed. Install with: pip install litellm\n" + "Document generation will use fallback mode." + ) + self.litellm = None + self.available = False + + def query_single( + self, + model: str, + prompt: str, + system_prompt: Optional[str] = None, + temperature: float = 0.7, + max_tokens: int = 4000, + **kwargs + ) -> Dict[str, Any]: + """ + Query a single LLM model. + + Args: + model: Model name + prompt: User prompt + system_prompt: Optional system prompt + temperature: Sampling temperature + max_tokens: Maximum tokens to generate + **kwargs: Additional arguments for litellm + + Returns: + Dictionary with 'content' and optionally 'error' keys + """ + if not self.available or not self.litellm: + return { + 'error': 'LLM client not available', + 'content': '' + } + + try: + # Build messages + messages = [] + if system_prompt: + messages.append({ + "role": "system", + "content": system_prompt + }) + messages.append({ + "role": "user", + "content": prompt + }) + + # Call LLM + response = self.litellm.completion( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + **kwargs + ) + + print("LLM Response:", response) + + # Extract content + content = response.choices[0].message.content + + return { + 'content': content, + 'model': model, + # Unused now + # 'usage': response.usage._asdict() if hasattr(response, 'usage') else {} + 'usage': response.usage + } + + except Exception as e: + logger.error(f"Error calling LLM {model}: {e}") + return { + 'error': str(e), + 'content': '' + } + + def query_multiple( + self, + prompts: List[str], + model: Optional[str] = None, + system_prompt: Optional[str] = None, + **kwargs + ) -> List[Dict[str, Any]]: + """ + Query LLM with multiple prompts. + + Args: + prompts: List of prompts + model: Model to use (default: first model in list) + system_prompt: Optional system prompt + **kwargs: Additional arguments + + Returns: + List of response dictionaries + """ + if not model: + model = self.models[0] if self.models else "gpt-4o-mini" + + responses = [] + for prompt in prompts: + response = self.query_single( + model=model, + prompt=prompt, + system_prompt=system_prompt, + **kwargs + ) + responses.append(response) + + return responses diff --git a/memov/docgen/__init__.py b/memov/docgen/__init__.py new file mode 100644 index 0000000..33ef1cf --- /dev/null +++ b/memov/docgen/__init__.py @@ -0,0 +1,47 @@ +""" +Documentation generation module for code analysis and documentation. + +This module provides tools for: +- Code parsing and analysis +- Document generation using LLM +- Mermaid diagram generation +- Multi-level documentation (commit, branch, repository) +- Git integration for commit/branch analysis +- Web preview server +""" + +from .code_analyzer import CodeAnalyzer, ClassInfo, FunctionInfo, ModuleInfo +from .diagram_generator import DiagramGenerator, DiagramType +from .doc_generator import DocType, DocumentGenerator, DocumentStructure, GeneratedDocument +from .git_utils import CommitInfo, GitUtils + +# Preview server is optional (requires starlette) +try: + from .preview_server import PreviewServer, start_preview_server + _HAS_PREVIEW_SERVER = True +except ImportError: + PreviewServer = None + start_preview_server = None + _HAS_PREVIEW_SERVER = False + +__all__ = [ + # Code Analysis + "CodeAnalyzer", + "ModuleInfo", + "ClassInfo", + "FunctionInfo", + # Document Generation + "DocumentGenerator", + "DocumentStructure", + "GeneratedDocument", + "DocType", + # Diagram Generation + "DiagramGenerator", + "DiagramType", + # Git Utilities + "GitUtils", + "CommitInfo", + # Preview Server (optional) + "PreviewServer", + "start_preview_server", +] diff --git a/memov/docgen/code_analyzer.py b/memov/docgen/code_analyzer.py new file mode 100644 index 0000000..18404c5 --- /dev/null +++ b/memov/docgen/code_analyzer.py @@ -0,0 +1,372 @@ +""" +Code analyzer for extracting structure and information from source code. + +Uses AST parsing to extract: +- Functions, classes, methods +- Parameters, return types, docstrings +- Dependencies and imports +- Call relationships +""" + +import ast +import logging +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, Set + +logger = logging.getLogger(__name__) + + +@dataclass +class FunctionInfo: + """Information about a function or method.""" + name: str + params: List[str] + return_type: Optional[str] = None + docstring: Optional[str] = None + decorators: List[str] = field(default_factory=list) + is_async: bool = False + line_start: int = 0 + line_end: int = 0 + body_preview: str = "" + + +@dataclass +class ClassInfo: + """Information about a class.""" + name: str + bases: List[str] + methods: List[FunctionInfo] + attributes: List[str] + docstring: Optional[str] = None + decorators: List[str] = field(default_factory=list) + line_start: int = 0 + line_end: int = 0 + + +@dataclass +class ModuleInfo: + """Information about a module/file.""" + file_path: str + imports: List[str] + functions: List[FunctionInfo] + classes: List[ClassInfo] + docstring: Optional[str] = None + dependencies: Set[str] = field(default_factory=set) + loc: int = 0 # Lines of code + + +class CodeAnalyzer: + """Analyzes Python code to extract structural information.""" + + def __init__(self, project_path: str): + """ + Initialize code analyzer. + + Args: + project_path: Path to the project root + """ + self.project_path = Path(project_path) + + def analyze_file(self, file_path: str) -> Optional[ModuleInfo]: + """ + Analyze a single Python file. + + Args: + file_path: Path to the file to analyze + + Returns: + ModuleInfo object or None if parsing fails + """ + try: + file_path_obj = Path(file_path) + if not file_path_obj.exists(): + logger.warning(f"File not found: {file_path}") + return None + + with open(file_path_obj, 'r', encoding='utf-8') as f: + code = f.read() + + # Parse AST + tree = ast.parse(code, filename=str(file_path_obj)) + + # Extract information + module_info = ModuleInfo( + file_path=str(file_path_obj), + imports=[], + functions=[], + classes=[], + docstring=ast.get_docstring(tree), + loc=len(code.splitlines()) + ) + + # Visit AST nodes + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + module_info.imports.append(alias.name) + module_info.dependencies.add(alias.name.split('.')[0]) + + elif isinstance(node, ast.ImportFrom): + if node.module: + module_info.imports.append(node.module) + module_info.dependencies.add(node.module.split('.')[0]) + + # Extract top-level functions and classes + for node in tree.body: + if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef): + func_info = self._extract_function(node) + module_info.functions.append(func_info) + + elif isinstance(node, ast.ClassDef): + class_info = self._extract_class(node) + module_info.classes.append(class_info) + + return module_info + + except Exception as e: + logger.error(f"Error analyzing file {file_path}: {e}") + return None + + def analyze_directory( + self, + directory: str, + extensions: List[str] = None, + exclude_patterns: List[str] = None + ) -> List[ModuleInfo]: + """ + Analyze all Python files in a directory. + + Args: + directory: Directory to analyze + extensions: File extensions to include (default: ['.py']) + exclude_patterns: Patterns to exclude (e.g., ['test_', '__pycache__']) + + Returns: + List of ModuleInfo objects + """ + if extensions is None: + extensions = ['.py'] + if exclude_patterns is None: + exclude_patterns = ['__pycache__', '.git', '.venv', 'venv', 'node_modules'] + + directory_path = Path(directory) + modules = [] + + for ext in extensions: + for file_path in directory_path.rglob(f'*{ext}'): + # Check exclude patterns + if any(pattern in str(file_path) for pattern in exclude_patterns): + continue + + module_info = self.analyze_file(str(file_path)) + if module_info: + modules.append(module_info) + + return modules + + def analyze_files(self, file_paths: List[str]) -> List[ModuleInfo]: + """ + Analyze a list of files. + + Args: + file_paths: List of file paths to analyze + + Returns: + List of ModuleInfo objects + """ + modules = [] + for file_path in file_paths: + module_info = self.analyze_file(file_path) + if module_info: + modules.append(module_info) + return modules + + def _extract_function(self, node: ast.FunctionDef) -> FunctionInfo: + """Extract information from a function/method node.""" + # Extract parameters + params = [] + for arg in node.args.args: + param_str = arg.arg + if arg.annotation: + param_str += f": {ast.unparse(arg.annotation)}" + params.append(param_str) + + # Extract return type + return_type = None + if node.returns: + return_type = ast.unparse(node.returns) + + # Extract decorators + decorators = [ast.unparse(dec) for dec in node.decorator_list] + + # Get body preview (first few lines) + body_lines = [] + for stmt in node.body[:3]: # First 3 statements + try: + body_lines.append(ast.unparse(stmt)) + except: + pass + body_preview = '\n'.join(body_lines) + + return FunctionInfo( + name=node.name, + params=params, + return_type=return_type, + docstring=ast.get_docstring(node), + decorators=decorators, + is_async=isinstance(node, ast.AsyncFunctionDef), + line_start=node.lineno, + line_end=node.end_lineno or node.lineno, + body_preview=body_preview + ) + + def _extract_class(self, node: ast.ClassDef) -> ClassInfo: + """Extract information from a class node.""" + # Extract base classes + bases = [ast.unparse(base) for base in node.bases] + + # Extract methods + methods = [] + for item in node.body: + if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): + methods.append(self._extract_function(item)) + + # Extract attributes (simple assignments) + attributes = [] + for item in node.body: + if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name): + attr_name = item.target.id + if item.annotation: + attr_name += f": {ast.unparse(item.annotation)}" + attributes.append(attr_name) + elif isinstance(item, ast.Assign): + for target in item.targets: + if isinstance(target, ast.Name): + attributes.append(target.id) + + # Extract decorators + decorators = [ast.unparse(dec) for dec in node.decorator_list] + + return ClassInfo( + name=node.name, + bases=bases, + methods=methods, + attributes=attributes, + docstring=ast.get_docstring(node), + decorators=decorators, + line_start=node.lineno, + line_end=node.end_lineno or node.lineno + ) + + def get_dependencies(self, modules: List[ModuleInfo]) -> Dict[str, Set[str]]: + """ + Build dependency graph from modules. + + Args: + modules: List of analyzed modules + + Returns: + Dictionary mapping file paths to their dependencies + """ + dependencies = {} + + for module in modules: + deps = set() + for imp in module.imports: + # Check if import is internal (within project) + imp_path = self.project_path / (imp.replace('.', '/') + '.py') + if imp_path.exists(): + deps.add(str(imp_path)) + dependencies[module.file_path] = deps + + return dependencies + + def get_call_graph(self, modules: List[ModuleInfo]) -> Dict[str, List[str]]: + """ + Build function call graph (simplified). + + Args: + modules: List of analyzed modules + + Returns: + Dictionary mapping function names to called functions + """ + call_graph = {} + + # Collect all function names + all_functions = set() + for module in modules: + for func in module.functions: + all_functions.add(func.name) + for cls in module.classes: + for method in cls.methods: + all_functions.add(f"{cls.name}.{method.name}") + + # Analyze function bodies for calls + for module in modules: + for func in module.functions: + calls = self._find_function_calls(func.body_preview, all_functions) + if calls: + call_graph[func.name] = calls + + for cls in module.classes: + for method in cls.methods: + calls = self._find_function_calls(method.body_preview, all_functions) + if calls: + call_graph[f"{cls.name}.{method.name}"] = calls + + return call_graph + + def _find_function_calls(self, code: str, known_functions: Set[str]) -> List[str]: + """Find function calls in code snippet.""" + calls = [] + for func_name in known_functions: + # Simple pattern matching for function calls + if re.search(rf'\b{re.escape(func_name)}\s*\(', code): + calls.append(func_name) + return calls + + def generate_summary(self, modules: List[ModuleInfo]) -> Dict[str, Any]: + """ + Generate summary statistics from analyzed modules. + + Args: + modules: List of analyzed modules + + Returns: + Summary dictionary + """ + total_loc = sum(m.loc for m in modules) + total_functions = sum(len(m.functions) for m in modules) + total_classes = sum(len(m.classes) for m in modules) + total_methods = sum( + sum(len(cls.methods) for cls in m.classes) + for m in modules + ) + + # Count dependencies + all_deps = set() + for module in modules: + all_deps.update(module.dependencies) + + # Find entry points (files with main or __main__) + entry_points = [] + for module in modules: + if module.docstring and 'entry point' in module.docstring.lower(): + entry_points.append(module.file_path) + for func in module.functions: + if func.name == 'main': + entry_points.append(module.file_path) + + return { + 'total_files': len(modules), + 'total_loc': total_loc, + 'total_functions': total_functions, + 'total_classes': total_classes, + 'total_methods': total_methods, + 'external_dependencies': sorted(all_deps), + 'entry_points': entry_points, + 'avg_loc_per_file': total_loc // len(modules) if modules else 0, + } diff --git a/memov/docgen/diagram_generator.py b/memov/docgen/diagram_generator.py new file mode 100644 index 0000000..513805b --- /dev/null +++ b/memov/docgen/diagram_generator.py @@ -0,0 +1,425 @@ +""" +Mermaid diagram generator for visualizing code structure and flow. + +Generates: +- Architecture diagrams +- Class diagrams +- Sequence diagrams +- Flow charts +- Dependency graphs +""" + +import logging +from enum import Enum +from typing import Dict, List, Optional, Set + +from memov.debugging.llm_client import LLMClient +from memov.docgen.code_analyzer import ClassInfo, ModuleInfo + +logger = logging.getLogger(__name__) + + +class DiagramType(Enum): + """Types of Mermaid diagrams.""" + FLOWCHART = "flowchart" + CLASS_DIAGRAM = "classDiagram" + SEQUENCE = "sequenceDiagram" + ER_DIAGRAM = "erDiagram" + STATE = "stateDiagram" + GRAPH = "graph" + + +class DiagramGenerator: + """Generates Mermaid diagrams from code analysis.""" + + def __init__(self, llm_client: Optional[LLMClient] = None): + """ + Initialize diagram generator. + + Args: + llm_client: Optional LLM client for intelligent diagram generation + """ + self.llm_client = llm_client + + def generate_architecture_diagram( + self, + modules: List[ModuleInfo], + title: str = "System Architecture" + ) -> str: + """ + Generate architecture diagram showing module relationships. + + Args: + modules: List of analyzed modules + title: Diagram title + + Returns: + Mermaid diagram as string + """ + lines = [] + lines.append("```mermaid") + lines.append("graph TB") + lines.append(f" title[\"{title}\"]") + lines.append("") + + # Group modules by directory + module_groups: Dict[str, List[ModuleInfo]] = {} + for module in modules: + # Get parent directory + parts = module.file_path.split('/') + if len(parts) > 1: + group = parts[-2] + else: + group = "root" + + if group not in module_groups: + module_groups[group] = [] + module_groups[group].append(module) + + # Create subgraphs for each directory + node_id = 0 + node_map = {} + + for group_name, group_modules in module_groups.items(): + lines.append(f" subgraph {group_name}[\"{group_name}\"]") + + for module in group_modules: + module_name = module.file_path.split('/')[-1].replace('.py', '') + node_id += 1 + node_name = f"mod{node_id}" + node_map[module.file_path] = node_name + + # Count elements + num_classes = len(module.classes) + num_functions = len(module.functions) + label = f"{module_name}\\n{num_classes}C {num_functions}F" + + lines.append(f" {node_name}[\"{label}\"]") + + lines.append(" end") + lines.append("") + + # Add dependencies + lines.append(" %% Dependencies") + for module in modules: + if module.file_path not in node_map: + continue + + source_node = node_map[module.file_path] + + for dep in module.dependencies: + # Try to find matching module + for other in modules: + if dep in other.file_path and other.file_path in node_map: + target_node = node_map[other.file_path] + lines.append(f" {source_node} --> {target_node}") + + lines.append("") + lines.append(" style title fill:#f9f,stroke:#333,stroke-width:2px") + lines.append("```") + + return "\n".join(lines) + + def generate_class_diagram( + self, + classes: List[ClassInfo], + title: str = "Class Diagram" + ) -> str: + """ + Generate UML class diagram. + + Args: + classes: List of class information + title: Diagram title + + Returns: + Mermaid diagram as string + """ + lines = [] + lines.append("```mermaid") + lines.append("classDiagram") + lines.append(f" %% {title}") + lines.append("") + + for cls in classes: + # Add class + lines.append(f" class {cls.name} {{") + + # Add attributes + if cls.attributes: + for attr in cls.attributes[:10]: # Limit to 10 + lines.append(f" +{attr}") + + # Add methods + if cls.methods: + for method in cls.methods[:10]: # Limit to 10 + params = ", ".join(method.params) if method.params else "" + return_type = f" {method.return_type}" if method.return_type else "" + lines.append(f" +{method.name}({params}){return_type}") + + lines.append(" }") + lines.append("") + + # Add inheritance relationships + if cls.bases: + for base in cls.bases: + # Clean up base name + base_name = base.split('.')[-1].split('(')[0] + lines.append(f" {base_name} <|-- {cls.name}") + + lines.append("```") + + return "\n".join(lines) + + def generate_dependency_graph( + self, + dependencies: Dict[str, Set[str]], + title: str = "Dependency Graph" + ) -> str: + """ + Generate dependency graph showing module dependencies. + + Args: + dependencies: Dictionary mapping modules to their dependencies + title: Diagram title + + Returns: + Mermaid diagram as string + """ + lines = [] + lines.append("```mermaid") + lines.append("graph LR") + lines.append(f" title[\"{title}\"]") + lines.append("") + + # Create nodes + all_modules = set(dependencies.keys()) + for deps in dependencies.values(): + all_modules.update(deps) + + node_map = {} + for idx, module in enumerate(sorted(all_modules)): + module_name = module.split('/')[-1].replace('.py', '') + node_id = f"M{idx}" + node_map[module] = node_id + lines.append(f" {node_id}[\"{module_name}\"]") + + lines.append("") + + # Add dependencies + for source, deps in dependencies.items(): + if source not in node_map: + continue + + source_id = node_map[source] + for dep in deps: + if dep in node_map: + target_id = node_map[dep] + lines.append(f" {source_id} --> {target_id}") + + lines.append("```") + + return "\n".join(lines) + + def generate_flowchart( + self, + steps: List[str], + title: str = "Process Flow" + ) -> str: + """ + Generate flowchart from list of steps. + + Args: + steps: List of process steps + title: Diagram title + + Returns: + Mermaid diagram as string + """ + lines = [] + lines.append("```mermaid") + lines.append("flowchart TD") + lines.append(f" A[Start: {title}]") + + for idx, step in enumerate(steps): + current = chr(66 + idx) # B, C, D, ... + next_node = chr(67 + idx) if idx < len(steps) - 1 else "Z" + + # Determine node shape based on content + if "decision" in step.lower() or "if" in step.lower() or "?" in step: + lines.append(f" {current}{{{step}}}") + elif "end" in step.lower() or "return" in step.lower(): + lines.append(f" {current}([{step}])") + else: + lines.append(f" {current}[{step}]") + + # Add connection + if idx < len(steps) - 1: + lines.append(f" {current} --> {next_node}") + else: + lines.append(f" {current} --> Z[End]") + + lines.append("```") + + return "\n".join(lines) + + def generate_sequence_diagram( + self, + interactions: List[tuple[str, str, str]], + title: str = "Sequence Diagram" + ) -> str: + """ + Generate sequence diagram showing interactions. + + Args: + interactions: List of (actor1, actor2, message) tuples + title: Diagram title + + Returns: + Mermaid diagram as string + """ + lines = [] + lines.append("```mermaid") + lines.append("sequenceDiagram") + lines.append(f" title {title}") + lines.append("") + + # Collect all actors + actors = set() + for actor1, actor2, _ in interactions: + actors.add(actor1) + actors.add(actor2) + + # Declare participants + for actor in sorted(actors): + lines.append(f" participant {actor}") + + lines.append("") + + # Add interactions + for actor1, actor2, message in interactions: + lines.append(f" {actor1}->>+{actor2}: {message}") + + lines.append("```") + + return "\n".join(lines) + + def generate_with_llm( + self, + diagram_type: DiagramType, + context: Dict, + prompt: Optional[str] = None + ) -> str: + """ + Generate diagram using LLM for intelligent interpretation. + + Args: + diagram_type: Type of diagram to generate + context: Context information for diagram generation + prompt: Optional custom prompt + + Returns: + Mermaid diagram as string + """ + if not self.llm_client: + raise ValueError("LLM client required for intelligent diagram generation") + + if prompt is None: + prompt = self._build_diagram_prompt(diagram_type, context) + + system_prompt = """You are an expert in creating Mermaid diagrams for software documentation. +Generate clear, well-structured Mermaid diagrams based on the provided information. + +Guidelines: +- Use proper Mermaid syntax +- Keep diagrams clear and readable +- Limit complexity (max 20 nodes for graphs) +- Use appropriate node shapes and styles +- Add clear labels and titles +- Ensure diagram is properly formatted with ```mermaid markers""" + + try: + response = self.llm_client.query_single( + model=self.llm_client.models[0], + prompt=prompt, + system_prompt=system_prompt, + temperature=0.5, + max_tokens=2000 + ) + + if response.get('error'): + logger.error(f"LLM error: {response['error']}") + return self._generate_fallback_diagram(diagram_type, context) + + content = response.get('content', '') + + # Extract mermaid code block if present + if '```mermaid' in content: + return content + elif '```' in content: + # Wrap in mermaid block + code = content.split('```')[1] + return f"```mermaid\n{code}\n```" + else: + return f"```mermaid\n{content}\n```" + + except Exception as e: + logger.error(f"Error generating diagram with LLM: {e}") + return self._generate_fallback_diagram(diagram_type, context) + + def _build_diagram_prompt( + self, + diagram_type: DiagramType, + context: Dict + ) -> str: + """Build prompt for LLM diagram generation.""" + lines = [] + + lines.append(f"Generate a {diagram_type.value} Mermaid diagram based on the following information:") + lines.append("") + + if 'modules' in context: + modules = context['modules'] + lines.append(f"Number of modules: {len(modules)}") + lines.append("Module names:") + for module in modules[:10]: + name = module.file_path.split('/')[-1] + lines.append(f"- {name} ({len(module.functions)}F, {len(module.classes)}C)") + lines.append("") + + if 'classes' in context: + classes = context['classes'] + lines.append(f"Number of classes: {len(classes)}") + lines.append("Class names:") + for cls in classes[:10]: + lines.append(f"- {cls.name} (methods: {', '.join([m.name for m in cls.methods[:3]])})") + lines.append("") + + if 'summary' in context: + summary = context['summary'] + lines.append("Project summary:") + for key, value in summary.items(): + lines.append(f"- {key}: {value}") + lines.append("") + + lines.append("Please generate a clear and well-structured Mermaid diagram.") + lines.append("Include the ```mermaid code block markers in your response.") + + return "\n".join(lines) + + def _generate_fallback_diagram( + self, + diagram_type: DiagramType, + context: Dict + ) -> str: + """Generate basic diagram as fallback.""" + if diagram_type == DiagramType.CLASS_DIAGRAM and 'classes' in context: + return self.generate_class_diagram(context['classes']) + elif diagram_type == DiagramType.GRAPH and 'modules' in context: + return self.generate_architecture_diagram(context['modules']) + else: + # Generic flowchart + return self.generate_flowchart( + steps=["Start", "Process", "Decision?", "End"], + title="Process Flow" + ) diff --git a/memov/docgen/doc_generator.py b/memov/docgen/doc_generator.py new file mode 100644 index 0000000..6da2cdc --- /dev/null +++ b/memov/docgen/doc_generator.py @@ -0,0 +1,558 @@ +""" +Document generator using LLM to create comprehensive documentation. + +Supports: +- Multiple documentation structures (README, API, Tutorial, etc.) +- Commit-level and branch-level documentation +- Integration with code analysis +""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List, Optional + +from memov.debugging.llm_client import LLMClient +from memov.docgen.code_analyzer import CodeAnalyzer, ModuleInfo + +logger = logging.getLogger(__name__) + + +class DocType(Enum): + """Types of documentation that can be generated.""" + README = "readme" + API_REFERENCE = "api_reference" + ARCHITECTURE = "architecture" + TUTORIAL = "tutorial" + CHANGELOG = "changelog" + FEATURE = "feature" + + +@dataclass +class DocumentStructure: + """Template structure for generated documentation.""" + doc_type: DocType + sections: List[str] + templates: Dict[str, str] = field(default_factory=dict) + + @staticmethod + def get_readme_structure() -> 'DocumentStructure': + """Get structure for README documentation.""" + return DocumentStructure( + doc_type=DocType.README, + sections=[ + "title", + "overview", + "features", + "installation", + "quick_start", + "usage", + "api_overview", + "architecture", + "contributing", + "license" + ], + templates={ + "title": "# {project_name}\n\n{description}", + "overview": "## Overview\n\n{overview_text}", + "features": "## Features\n\n{feature_list}", + "installation": "## Installation\n\n```bash\n{install_commands}\n```", + "quick_start": "## Quick Start\n\n{quick_start_example}", + "usage": "## Usage\n\n{usage_examples}", + "api_overview": "## API Overview\n\n{api_summary}", + "architecture": "## Architecture\n\n{architecture_diagram}", + } + ) + + @staticmethod + def get_api_structure() -> 'DocumentStructure': + """Get structure for API reference documentation.""" + return DocumentStructure( + doc_type=DocType.API_REFERENCE, + sections=[ + "title", + "overview", + "modules", + "classes", + "functions", + "examples" + ], + templates={ + "title": "# API Reference\n\n", + "modules": "## Modules\n\n{module_list}", + "classes": "## Classes\n\n{class_descriptions}", + "functions": "## Functions\n\n{function_descriptions}", + } + ) + + @staticmethod + def get_feature_structure() -> 'DocumentStructure': + """Get structure for feature documentation.""" + return DocumentStructure( + doc_type=DocType.FEATURE, + sections=[ + "title", + "overview", + "motivation", + "design", + "implementation", + "usage", + "testing", + "related_changes" + ], + templates={ + "title": "# Feature: {feature_name}\n\n", + "overview": "## Overview\n\n{overview_text}", + "motivation": "## Motivation\n\n{motivation_text}", + "design": "## Design\n\n{design_details}", + "implementation": "## Implementation\n\n{implementation_details}", + "usage": "## Usage\n\n{usage_examples}", + } + ) + + @staticmethod + def get_architecture_structure() -> 'DocumentStructure': + """Get structure for architecture documentation.""" + return DocumentStructure( + doc_type=DocType.ARCHITECTURE, + sections=[ + "title", + "overview", + "components", + "data_flow", + "diagrams", + "design_patterns", + "dependencies" + ], + templates={ + "title": "# Architecture Documentation\n\n", + "overview": "## Overview\n\n{overview_text}", + "components": "## Components\n\n{component_list}", + "data_flow": "## Data Flow\n\n{data_flow_diagram}", + "diagrams": "## Architecture Diagrams\n\n{diagrams}", + } + ) + + +@dataclass +class GeneratedDocument: + """A generated documentation document.""" + doc_type: DocType + title: str + content: str + metadata: Dict[str, Any] + timestamp: datetime = field(default_factory=datetime.now) + + +class DocumentGenerator: + """Generates documentation using LLM and code analysis.""" + + def __init__( + self, + code_analyzer: CodeAnalyzer, + llm_client: Optional[LLMClient] = None, + model: str = "gpt-4o-mini" + ): + """ + Initialize document generator. + + Args: + code_analyzer: CodeAnalyzer instance + llm_client: LLMClient instance (optional) + model: Model to use for generation + """ + self.code_analyzer = code_analyzer + self.model = model + + if llm_client: + self.llm_client = llm_client + else: + try: + self.llm_client = LLMClient(models=[model]) + except ImportError: + logger.warning("LiteLLM not available. Using fallback mode.") + self.llm_client = None + + def generate_for_commit( + self, + commit_hash: str, + changed_files: List[str], + commit_message: str, + doc_type: DocType = DocType.FEATURE + ) -> GeneratedDocument: + """ + Generate documentation for a specific commit. + + Args: + commit_hash: Commit hash + changed_files: List of files changed in the commit + commit_message: Commit message + doc_type: Type of documentation to generate + + Returns: + GeneratedDocument object + """ + logger.info(f"Generating {doc_type.value} documentation for commit {commit_hash[:8]}") + + # Analyze changed files + modules = self.code_analyzer.analyze_files(changed_files) + + # Get appropriate structure + structure = self._get_structure(doc_type) + + # Build context for LLM + context = self._build_commit_context( + commit_hash=commit_hash, + commit_message=commit_message, + modules=modules, + changed_files=changed_files + ) + + # Generate documentation content + content = self._generate_content(structure, context) + + return GeneratedDocument( + doc_type=doc_type, + title=f"Commit {commit_hash[:8]}: {commit_message.splitlines()[0][:50]}", + content=content, + metadata={ + 'commit_hash': commit_hash, + 'commit_message': commit_message, + 'changed_files': changed_files, + 'num_files': len(changed_files), + } + ) + + def generate_for_branch( + self, + branch_name: str, + directory: str, + doc_type: DocType = DocType.README, + commit_range: Optional[tuple[str, str]] = None + ) -> GeneratedDocument: + """ + Generate documentation for a branch. + + Args: + branch_name: Branch name + directory: Directory to analyze + doc_type: Type of documentation to generate + commit_range: Optional tuple of (start_commit, end_commit) + + Returns: + GeneratedDocument object + """ + logger.info(f"Generating {doc_type.value} documentation for branch {branch_name}") + + # Analyze all files in directory + modules = self.code_analyzer.analyze_directory(directory) + + # Get appropriate structure + structure = self._get_structure(doc_type) + + # Build context for LLM + context = self._build_branch_context( + branch_name=branch_name, + modules=modules, + commit_range=commit_range + ) + + # Generate documentation content + content = self._generate_content(structure, context) + + return GeneratedDocument( + doc_type=doc_type, + title=f"Branch: {branch_name}", + content=content, + metadata={ + 'branch_name': branch_name, + 'num_modules': len(modules), + 'total_loc': sum(m.loc for m in modules), + } + ) + + def generate_for_repository( + self, + directory: str, + doc_types: List[DocType] = None + ) -> List[GeneratedDocument]: + """ + Generate comprehensive documentation for entire repository. + + Args: + directory: Repository directory + doc_types: List of documentation types to generate + + Returns: + List of GeneratedDocument objects + """ + if doc_types is None: + doc_types = [DocType.README, DocType.API_REFERENCE, DocType.ARCHITECTURE] + + logger.info(f"Generating repository documentation: {doc_types}") + + # Analyze entire repository + modules = self.code_analyzer.analyze_directory(directory) + + documents = [] + for doc_type in doc_types: + doc = self.generate_for_branch( + branch_name="main", + directory=directory, + doc_type=doc_type + ) + documents.append(doc) + + return documents + + def _get_structure(self, doc_type: DocType) -> DocumentStructure: + """Get document structure for given type.""" + structure_map = { + DocType.README: DocumentStructure.get_readme_structure(), + DocType.API_REFERENCE: DocumentStructure.get_api_structure(), + DocType.FEATURE: DocumentStructure.get_feature_structure(), + DocType.ARCHITECTURE: DocumentStructure.get_architecture_structure(), + } + return structure_map.get(doc_type, DocumentStructure.get_readme_structure()) + + def _build_commit_context( + self, + commit_hash: str, + commit_message: str, + modules: List[ModuleInfo], + changed_files: List[str] + ) -> Dict[str, Any]: + """Build context for commit-level documentation.""" + # Extract code information + functions = [] + classes = [] + for module in modules: + functions.extend(module.functions) + classes.extend(module.classes) + + # Build context dictionary + context = { + 'commit_hash': commit_hash, + 'commit_message': commit_message, + 'changed_files': changed_files, + 'num_functions': len(functions), + 'num_classes': len(classes), + 'function_names': [f.name for f in functions], + 'class_names': [c.name for c in classes], + 'modules': modules, + } + + # Add detailed function/class info + if functions: + context['main_functions'] = [ + { + 'name': f.name, + 'params': f.params, + 'return_type': f.return_type, + 'docstring': f.docstring + } + for f in functions[:5] # First 5 functions + ] + + if classes: + context['main_classes'] = [ + { + 'name': c.name, + 'methods': [m.name for m in c.methods], + 'docstring': c.docstring + } + for c in classes[:5] # First 5 classes + ] + + return context + + def _build_branch_context( + self, + branch_name: str, + modules: List[ModuleInfo], + commit_range: Optional[tuple[str, str]] = None + ) -> Dict[str, Any]: + """Build context for branch-level documentation.""" + # Generate summary + summary = self.code_analyzer.generate_summary(modules) + + # Build dependency graph + dependencies = self.code_analyzer.get_dependencies(modules) + + # Extract all functions and classes + all_functions = [] + all_classes = [] + for module in modules: + all_functions.extend(module.functions) + all_classes.extend(module.classes) + + context = { + 'branch_name': branch_name, + 'summary': summary, + 'dependencies': dependencies, + 'modules': modules, + 'all_functions': all_functions, + 'all_classes': all_classes, + 'commit_range': commit_range, + } + + return context + + def _generate_content( + self, + structure: DocumentStructure, + context: Dict[str, Any] + ) -> str: + """Generate documentation content using LLM.""" + if not self.llm_client: + # Fallback: generate basic structure without LLM + return self._generate_fallback_content(structure, context) + + # Build prompt for LLM + prompt = self._build_prompt(structure, context) + + system_prompt = """You are an expert technical writer and software documentation specialist. +Your task is to generate clear, comprehensive, and well-structured documentation based on code analysis. + +Guidelines: +- Write in clear, concise language +- Include code examples where appropriate +- Use proper markdown formatting +- Structure information logically +- Focus on practical usage +- Explain complex concepts simply""" + + try: + # Query LLM + response = self.llm_client.query_single( + model=self.model, + prompt=prompt, + system_prompt=system_prompt, + temperature=0.7, + max_tokens=4000 + ) + + if response.get('error'): + logger.error(f"LLM error: {response['error']}") + return self._generate_fallback_content(structure, context) + + return response.get('content', '') + + except Exception as e: + logger.error(f"Error generating content: {e}") + return self._generate_fallback_content(structure, context) + + def _build_prompt( + self, + structure: DocumentStructure, + context: Dict[str, Any] + ) -> str: + """Build prompt for LLM based on structure and context.""" + lines = [] + + lines.append(f"Generate {structure.doc_type.value} documentation with the following structure:") + lines.append("") + lines.append("Required sections:") + for section in structure.sections: + lines.append(f"- {section}") + lines.append("") + + lines.append("Code Analysis Context:") + lines.append("") + + # Add relevant context based on what's available + if 'commit_message' in context: + lines.append(f"Commit Message: {context['commit_message']}") + lines.append(f"Changed Files: {', '.join(context.get('changed_files', []))}") + lines.append("") + + if 'branch_name' in context: + lines.append(f"Branch: {context['branch_name']}") + lines.append("") + + if 'summary' in context: + summary = context['summary'] + lines.append("Project Summary:") + lines.append(f"- Total Files: {summary.get('total_files', 0)}") + lines.append(f"- Total LOC: {summary.get('total_loc', 0)}") + lines.append(f"- Functions: {summary.get('total_functions', 0)}") + lines.append(f"- Classes: {summary.get('total_classes', 0)}") + lines.append("") + + # Add function details + if 'main_functions' in context: + lines.append("Key Functions:") + for func in context['main_functions'][:3]: + lines.append(f"- {func['name']}({', '.join(func['params'])})") + if func.get('docstring'): + lines.append(f" {func['docstring'][:100]}") + lines.append("") + + # Add class details + if 'main_classes' in context: + lines.append("Key Classes:") + for cls in context['main_classes'][:3]: + lines.append(f"- {cls['name']}") + if cls.get('methods'): + lines.append(f" Methods: {', '.join(cls['methods'][:5])}") + lines.append("") + + lines.append("Please generate comprehensive documentation following the structure above.") + lines.append("Use proper markdown formatting including headers, code blocks, and lists.") + + return "\n".join(lines) + + def _generate_fallback_content( + self, + structure: DocumentStructure, + context: Dict[str, Any] + ) -> str: + """Generate basic documentation without LLM.""" + lines = [] + + # Title + if 'commit_message' in context: + title = f"# Commit Documentation\n\n**Commit**: {context['commit_hash'][:8]}\n" + title += f"**Message**: {context['commit_message']}\n" + lines.append(title) + elif 'branch_name' in context: + lines.append(f"# {structure.doc_type.value.replace('_', ' ').title()}\n") + lines.append(f"**Branch**: {context['branch_name']}\n") + + # Summary + if 'summary' in context: + summary = context['summary'] + lines.append("## Summary\n") + lines.append(f"- Files: {summary.get('total_files', 0)}") + lines.append(f"- Lines of Code: {summary.get('total_loc', 0)}") + lines.append(f"- Functions: {summary.get('total_functions', 0)}") + lines.append(f"- Classes: {summary.get('total_classes', 0)}") + lines.append("") + + # Functions + if 'main_functions' in context: + lines.append("## Functions\n") + for func in context['main_functions'][:10]: + lines.append(f"### `{func['name']}`\n") + if func.get('params'): + lines.append(f"**Parameters**: `{', '.join(func['params'])}`\n") + if func.get('return_type'): + lines.append(f"**Returns**: `{func['return_type']}`\n") + if func.get('docstring'): + lines.append(f"{func['docstring']}\n") + lines.append("") + + # Classes + if 'main_classes' in context: + lines.append("## Classes\n") + for cls in context['main_classes'][:10]: + lines.append(f"### `{cls['name']}`\n") + if cls.get('methods'): + lines.append(f"**Methods**: {', '.join(cls['methods'])}\n") + if cls.get('docstring'): + lines.append(f"{cls['docstring']}\n") + lines.append("") + + return "\n".join(lines) diff --git a/memov/docgen/git_utils.py b/memov/docgen/git_utils.py new file mode 100644 index 0000000..890b648 --- /dev/null +++ b/memov/docgen/git_utils.py @@ -0,0 +1,405 @@ +""" +Git utilities for analyzing commits and branches. + +Provides functions to: +- Get changed files from commits +- Get commit messages and metadata +- Analyze commit ranges +- Get branch information +""" + +import logging +import subprocess +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Set, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class CommitInfo: + """Information about a Git commit.""" + hash: str + author: str + date: datetime + message: str + changed_files: List[str] = field(default_factory=list) + additions: int = 0 + deletions: int = 0 + + +class GitUtils: + """Utilities for Git repository operations.""" + + def __init__(self, repo_path: str = "."): + """ + Initialize Git utilities. + + Args: + repo_path: Path to the Git repository + """ + self.repo_path = Path(repo_path).resolve() + self._validate_git_repo() + + def _validate_git_repo(self) -> None: + """Check if the path is a valid Git repository.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + logger.info(f"Git repository found at {self.repo_path}") + except subprocess.CalledProcessError: + raise ValueError(f"Not a Git repository: {self.repo_path}") + + def get_commit_info(self, commit_hash: str) -> Optional[CommitInfo]: + """ + Get detailed information about a commit. + + Args: + commit_hash: Commit hash or reference (e.g., HEAD, branch name) + + Returns: + CommitInfo object or None if commit not found + """ + try: + # Get commit metadata + result = subprocess.run( + [ + "git", "log", "-1", + "--format=%H%n%an%n%aI%n%B", + commit_hash + ], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + + lines = result.stdout.strip().split('\n') + if len(lines) < 4: + logger.error(f"Invalid commit format for {commit_hash}") + return None + + full_hash = lines[0] + author = lines[1] + date_str = lines[2] + message = '\n'.join(lines[3:]) + + # Parse date + date = datetime.fromisoformat(date_str.replace('Z', '+00:00')) + + # Get changed files + changed_files = self.get_changed_files(commit_hash) + + # Get stats + stats = self.get_commit_stats(commit_hash) + + return CommitInfo( + hash=full_hash, + author=author, + date=date, + message=message, + changed_files=changed_files, + additions=stats.get('additions', 0), + deletions=stats.get('deletions', 0) + ) + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to get commit info for {commit_hash}: {e}") + return None + + def get_changed_files( + self, + commit_hash: str, + file_extensions: Optional[List[str]] = None + ) -> List[str]: + """ + Get list of files changed in a commit. + + Args: + commit_hash: Commit hash or reference + file_extensions: Optional list of file extensions to filter (e.g., ['.py', '.js']) + + Returns: + List of absolute file paths + """ + try: + # Get changed files relative to parent + result = subprocess.run( + ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + + files = [] + for file_path in result.stdout.strip().split('\n'): + if not file_path: + continue + + # Convert to absolute path + abs_path = (self.repo_path / file_path).resolve() + + # Filter by extension if specified + if file_extensions: + if not any(str(abs_path).endswith(ext) for ext in file_extensions): + continue + + # Only include existing files + if abs_path.exists(): + files.append(str(abs_path)) + + return files + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to get changed files for {commit_hash}: {e}") + return [] + + def get_commit_stats(self, commit_hash: str) -> Dict[str, int]: + """ + Get statistics for a commit (additions, deletions). + + Args: + commit_hash: Commit hash or reference + + Returns: + Dictionary with 'additions' and 'deletions' keys + """ + try: + result = subprocess.run( + ["git", "show", "--shortstat", "--format=", commit_hash], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + + output = result.stdout.strip() + stats = {'additions': 0, 'deletions': 0} + + # Parse output like: "2 files changed, 10 insertions(+), 5 deletions(-)" + if 'insertion' in output: + additions = output.split('insertion')[0].split()[-1] + stats['additions'] = int(additions) + if 'deletion' in output: + deletions = output.split('deletion')[0].split()[-1] + stats['deletions'] = int(deletions) + + return stats + + except (subprocess.CalledProcessError, ValueError) as e: + logger.error(f"Failed to get commit stats for {commit_hash}: {e}") + return {'additions': 0, 'deletions': 0} + + def get_commits_in_range( + self, + start_ref: str, + end_ref: str = "HEAD" + ) -> List[CommitInfo]: + """ + Get all commits in a range. + + Args: + start_ref: Starting commit reference + end_ref: Ending commit reference (default: HEAD) + + Returns: + List of CommitInfo objects + """ + try: + # Get commit hashes in range + result = subprocess.run( + ["git", "rev-list", f"{start_ref}..{end_ref}"], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + + commits = [] + for commit_hash in result.stdout.strip().split('\n'): + if commit_hash: + commit_info = self.get_commit_info(commit_hash) + if commit_info: + commits.append(commit_info) + + return commits + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to get commits in range {start_ref}..{end_ref}: {e}") + return [] + + def get_branch_commits( + self, + branch_name: str, + base_branch: str = "main" + ) -> List[CommitInfo]: + """ + Get commits unique to a branch (not in base branch). + + Args: + branch_name: Branch to analyze + base_branch: Base branch to compare against + + Returns: + List of CommitInfo objects + """ + return self.get_commits_in_range(base_branch, branch_name) + + def get_current_branch(self) -> str: + """ + Get the name of the current branch. + + Returns: + Branch name or empty string if detached HEAD + """ + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + branch = result.stdout.strip() + return branch if branch != "HEAD" else "" + + except subprocess.CalledProcessError: + return "" + + def get_file_content_at_commit( + self, + file_path: str, + commit_hash: str + ) -> Optional[str]: + """ + Get file content at a specific commit. + + Args: + file_path: Relative path to file from repo root + commit_hash: Commit hash or reference + + Returns: + File content as string or None if not found + """ + try: + result = subprocess.run( + ["git", "show", f"{commit_hash}:{file_path}"], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + return result.stdout + + except subprocess.CalledProcessError: + logger.warning(f"Could not get {file_path} at {commit_hash}") + return None + + def get_commit_diff( + self, + commit_hash: str, + file_path: Optional[str] = None + ) -> str: + """ + Get diff for a commit. + + Args: + commit_hash: Commit hash or reference + file_path: Optional specific file to get diff for + + Returns: + Diff as string + """ + try: + cmd = ["git", "show", commit_hash] + if file_path: + cmd.append("--") + cmd.append(file_path) + + result = subprocess.run( + cmd, + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + return result.stdout + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to get diff for {commit_hash}: {e}") + return "" + + def get_all_branches(self) -> List[str]: + """ + Get list of all branches in the repository. + + Returns: + List of branch names + """ + try: + result = subprocess.run( + ["git", "branch", "--format=%(refname:short)"], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + branches = [b.strip() for b in result.stdout.strip().split('\n') if b.strip()] + return branches + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to get branches: {e}") + return [] + + def get_files_in_branch( + self, + branch_name: str, + file_extensions: Optional[List[str]] = None + ) -> List[str]: + """ + Get all files in a branch. + + Args: + branch_name: Branch name + file_extensions: Optional list of file extensions to filter + + Returns: + List of absolute file paths + """ + try: + result = subprocess.run( + ["git", "ls-tree", "-r", "--name-only", branch_name], + cwd=self.repo_path, + capture_output=True, + text=True, + check=True + ) + + files = [] + for file_path in result.stdout.strip().split('\n'): + if not file_path: + continue + + # Convert to absolute path + abs_path = (self.repo_path / file_path).resolve() + + # Filter by extension if specified + if file_extensions: + if not any(str(abs_path).endswith(ext) for ext in file_extensions): + continue + + files.append(str(abs_path)) + + return files + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to get files in branch {branch_name}: {e}") + return [] diff --git a/memov/docgen/preview_server.py b/memov/docgen/preview_server.py new file mode 100644 index 0000000..35b0efc --- /dev/null +++ b/memov/docgen/preview_server.py @@ -0,0 +1,542 @@ +""" +Web server for previewing generated documentation with Mermaid support. + +Features: +- Markdown rendering +- Mermaid diagram rendering +- Syntax highlighting +- File browser navigation +- Responsive design +""" + +import logging +from pathlib import Path +from typing import Optional +from urllib.parse import unquote + +from starlette.applications import Starlette +from starlette.responses import HTMLResponse, Response +from starlette.routing import Route +from starlette.staticfiles import StaticFiles +import uvicorn + +logger = logging.getLogger(__name__) + + +HTML_TEMPLATE = """ + +
+ + +Generate some documentation first using the mem-docgen tool.
+Failed to read document: {e}
" + ), + status_code=500 + ) + + # Generate HTML + title = doc_path.stem + breadcrumb = self._build_breadcrumb(path) + current_path = f"/view/{path}" + + html = HTML_TEMPLATE.format( + title=title, + nav_items=self._build_nav_items(current_path), + breadcrumb=breadcrumb, + content=md_content # Will be rendered by marked.js on client side + ) + + return HTMLResponse(html) + + def create_app(self) -> Starlette: + """Create Starlette application.""" + routes = [ + Route("/", self.index), + Route("/view/{path:path}", self.view_doc), + ] + + app = Starlette(debug=True, routes=routes) + return app + + def start(self): + """Start the preview server.""" + app = self.create_app() + + logger.info(f"Starting preview server at http://{self.host}:{self.port}") + logger.info(f"Serving documentation from: {self.docs_dir}") + + print(f"\n{'='*60}") + print(f"📚 Documentation Preview Server") + print(f"{'='*60}") + print(f" URL: http://{self.host}:{self.port}") + print(f" Docs: {self.docs_dir}") + print(f"{'='*60}\n") + + uvicorn.run( + app, + host=self.host, + port=self.port, + log_level="info" + ) + + +def start_preview_server( + docs_dir: str = ".mem/docs", + host: str = "127.0.0.1", + port: int = 8000 +): + """ + Start the documentation preview server. + + Args: + docs_dir: Directory containing documentation + host: Host to bind to + port: Port to listen on + """ + server = PreviewServer(docs_dir=docs_dir, host=host, port=port) + server.start() + + +if __name__ == "__main__": + start_preview_server() diff --git a/memov/docgen_cli.py b/memov/docgen_cli.py new file mode 100644 index 0000000..04d6037 --- /dev/null +++ b/memov/docgen_cli.py @@ -0,0 +1,555 @@ +#!/usr/bin/env python3 +""" +CLI tool for generating code documentation. + +Supports: +- Commit-level documentation +- Branch-level documentation +- Repository-level documentation +- Mermaid diagram generation +- Web preview server +""" + +import logging +import sys +from pathlib import Path +from typing import List, Optional + +import typer +from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn +from rich.table import Table +from typing_extensions import Annotated + +from memov.docgen.code_analyzer import CodeAnalyzer +from memov.docgen.diagram_generator import DiagramGenerator, DiagramType +from memov.docgen.doc_generator import DocType, DocumentGenerator +from memov.docgen.git_utils import GitUtils +from memov.docgen.preview_server import start_preview_server +from memov.utils.logging_utils import setup_logging + +console = Console() +app = typer.Typer( + name="mem-docgen", + help="Generate comprehensive documentation from code and Git history", + no_args_is_help=True, +) + +logger = logging.getLogger(__name__) + + +def init_components( + project_path: str = ".", + model: str = "qwen2:0.5b" +) -> tuple: + """ + Initialize all required components. + + Returns: + Tuple of (analyzer, generator, diagram_gen, git_utils) + """ + try: + # Initialize Git utilities + git_utils = GitUtils(project_path) + + # Initialize code analyzer + analyzer = CodeAnalyzer(project_path) + + # Initialize LLM client (optional) + llm_client = None + try: + from memov.debugging.llm_client import LLMClient + llm_client = LLMClient(models=[model]) + except ImportError: + console.print("[yellow]Warning: LLM client not available. Using fallback mode.[/yellow]") + + # Initialize generators + generator = DocumentGenerator(analyzer, llm_client, model) + diagram_gen = DiagramGenerator(llm_client) + + return analyzer, generator, diagram_gen, git_utils + + except Exception as e: + console.print(f"[red]Error initializing components: {e}[/red]") + logger.error(f"Initialization error: {e}", exc_info=True) + sys.exit(1) + + +@app.command() +def generate_commit( + commit_hash: Annotated[str, typer.Argument(help="Commit hash or reference (e.g., HEAD, abc123)")], + output_dir: Annotated[str, typer.Option("--output", "-o", help="Output directory")] = ".mem/docs/commits", + doc_type: Annotated[str, typer.Option("--type", "-t", help="Document type")] = "feature", + model: Annotated[str, typer.Option("--model", "-m", help="LLM model to use")] = "qwen2:0.5b", + with_diagram: Annotated[bool, typer.Option("--diagram", "-d", help="Generate mermaid diagrams")] = True, + file_extensions: Annotated[Optional[str], typer.Option("--ext", help="Filter files by extensions (comma-separated)")] = ".py", + project_path: Annotated[str, typer.Option("--path", "-p", help="Project path")] = ".", +): + """ + Generate documentation for a specific commit. + + Examples: + mem-docgen generate-commit HEAD + mem-docgen generate-commit abc123 --type feature + mem-docgen generate-commit HEAD~1 --diagram + """ + console.print(f"\n[cyan]📝 Generating documentation for commit {commit_hash}[/cyan]\n") + + # Initialize components + analyzer, generator, diagram_gen, git_utils = init_components(project_path, model) + + # Parse file extensions + extensions = [ext.strip() for ext in file_extensions.split(",")] if file_extensions else None + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console + ) as progress: + # Get commit info + task = progress.add_task("Fetching commit information...", total=None) + commit_info = git_utils.get_commit_info(commit_hash) + + if not commit_info: + console.print(f"[red]✗ Commit not found: {commit_hash}[/red]") + sys.exit(1) + + # Get changed files + progress.update(task, description="Analyzing changed files...") + changed_files = git_utils.get_changed_files(commit_hash, extensions) + + if not changed_files: + console.print(f"[yellow]⚠ No files to analyze (filter: {extensions})[/yellow]") + sys.exit(0) + + console.print(f" Commit: [green]{commit_info.hash[:8]}[/green]") + console.print(f" Author: {commit_info.author}") + console.print(f" Files changed: {len(changed_files)}") + console.print(f" +{commit_info.additions} -{commit_info.deletions}") + console.print() + + # Generate documentation + progress.update(task, description="Generating documentation...") + try: + doc_type_enum = DocType[doc_type.upper()] + except KeyError: + console.print(f"[red]Invalid doc type: {doc_type}[/red]") + console.print(f"Available types: {', '.join([dt.value for dt in DocType])}") + sys.exit(1) + + doc = generator.generate_for_commit( + commit_hash=commit_info.hash, + changed_files=changed_files, + commit_message=commit_info.message, + doc_type=doc_type_enum + ) + + # Create output directory + output_path = Path(output_dir) / commit_info.hash[:8] + output_path.mkdir(parents=True, exist_ok=True) + + # Save main documentation + doc_file = output_path / f"{doc_type}.md" + with open(doc_file, 'w', encoding='utf-8') as f: + f.write(doc.content) + + progress.update(task, description="Documentation generated!") + console.print(f"[green]✓[/green] Documentation saved to: {doc_file}") + + # Generate diagrams if requested + if with_diagram: + progress.update(task, description="Generating diagrams...") + modules = analyzer.analyze_files(changed_files) + + if modules: + # Generate architecture diagram + arch_diagram = diagram_gen.generate_architecture_diagram( + modules, + title=f"Commit {commit_info.hash[:8]} Architecture" + ) + + diagram_file = output_path / "architecture.md" + with open(diagram_file, 'w', encoding='utf-8') as f: + f.write(f"# Architecture Diagram\n\n") + f.write(f"**Commit**: {commit_info.hash[:8]}\n\n") + f.write(arch_diagram) + + console.print(f"[green]✓[/green] Diagram saved to: {diagram_file}") + + # Generate class diagram if classes exist + all_classes = [] + for module in modules: + all_classes.extend(module.classes) + + if all_classes: + class_diagram = diagram_gen.generate_class_diagram(all_classes) + class_file = output_path / "classes.md" + with open(class_file, 'w', encoding='utf-8') as f: + f.write(f"# Class Diagram\n\n") + f.write(class_diagram) + + console.print(f"[green]✓[/green] Class diagram saved to: {class_file}") + + console.print(f"\n[bold green]✨ Done![/bold green] View at: {output_path}\n") + + +@app.command() +def generate_branch( + branch_name: Annotated[Optional[str], typer.Argument(help="Branch name (default: current branch)")] = None, + output_dir: Annotated[str, typer.Option("--output", "-o", help="Output directory")] = ".mem/docs/branches", + doc_types: Annotated[str, typer.Option("--types", "-t", help="Document types (comma-separated)")] = "readme,api_reference,architecture", + model: Annotated[str, typer.Option("--model", "-m", help="LLM model to use")] = "gpt-4o-mini", + with_diagram: Annotated[bool, typer.Option("--diagram", "-d", help="Generate mermaid diagrams")] = True, + base_branch: Annotated[Optional[str], typer.Option("--base", "-b", help="Base branch for comparison")] = None, + project_path: Annotated[str, typer.Option("--path", "-p", help="Project path")] = ".", +): + """ + Generate documentation for a branch. + + Examples: + mem-docgen generate-branch + mem-docgen generate-branch feat/new-feature + mem-docgen generate-branch --types "readme,api_reference" + mem-docgen generate-branch --base main + """ + # Initialize components + analyzer, generator, diagram_gen, git_utils = init_components(project_path, model) + + # Get branch name + if not branch_name: + branch_name = git_utils.get_current_branch() + if not branch_name: + console.print("[red]✗ Not on a branch. Please specify branch name.[/red]") + sys.exit(1) + + console.print(f"\n[cyan]📚 Generating documentation for branch '{branch_name}'[/cyan]\n") + + # Parse document types + try: + doc_type_list = [DocType[dt.strip().upper()] for dt in doc_types.split(",")] + except KeyError as e: + console.print(f"[red]Invalid doc type: {e}[/red]") + console.print(f"Available types: {', '.join([dt.value for dt in DocType])}") + sys.exit(1) + + # Create output directory + output_path = Path(output_dir) / branch_name.replace('/', '-') + output_path.mkdir(parents=True, exist_ok=True) + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console + ) as progress: + # Get commit range if base branch specified + commit_range = None + if base_branch: + task = progress.add_task("Analyzing commits...", total=None) + commits = git_utils.get_branch_commits(branch_name, base_branch) + console.print(f" Commits: {len(commits)}") + console.print(f" Base branch: {base_branch}") + console.print() + commit_range = (base_branch, branch_name) + + # Generate each document type + for doc_type in doc_type_list: + task = progress.add_task(f"Generating {doc_type.value}...", total=None) + + doc = generator.generate_for_branch( + branch_name=branch_name, + directory=project_path, + doc_type=doc_type, + commit_range=commit_range + ) + + # Save document + filename = f"{doc_type.value}.md" + doc_file = output_path / filename + with open(doc_file, 'w', encoding='utf-8') as f: + f.write(doc.content) + + console.print(f"[green]✓[/green] {doc_type.value.title()} → {doc_file}") + + # Generate diagrams if requested + if with_diagram: + progress.add_task("Generating diagrams...", total=None) + + # Analyze project + modules = analyzer.analyze_directory(project_path) + + # Architecture diagram + arch_diagram = diagram_gen.generate_architecture_diagram( + modules, + title=f"Branch '{branch_name}' Architecture" + ) + arch_file = output_path / "architecture_diagram.md" + with open(arch_file, 'w', encoding='utf-8') as f: + f.write(f"# Architecture Diagram\n\n") + f.write(arch_diagram) + console.print(f"[green]✓[/green] Architecture diagram → {arch_file}") + + # Dependency graph + dependencies = analyzer.get_dependencies(modules) + if dependencies: + dep_diagram = diagram_gen.generate_dependency_graph(dependencies) + dep_file = output_path / "dependencies_diagram.md" + with open(dep_file, 'w', encoding='utf-8') as f: + f.write(f"# Dependency Graph\n\n") + f.write(dep_diagram) + console.print(f"[green]✓[/green] Dependency graph → {dep_file}") + + # Class diagram + all_classes = [] + for module in modules: + all_classes.extend(module.classes) + + if all_classes: + class_diagram = diagram_gen.generate_class_diagram(all_classes[:20]) # Limit to 20 + class_file = output_path / "classes_diagram.md" + with open(class_file, 'w', encoding='utf-8') as f: + f.write(f"# Class Diagram\n\n") + f.write(class_diagram) + console.print(f"[green]✓[/green] Class diagram → {class_file}") + + console.print(f"\n[bold green]✨ Done![/bold green] View at: {output_path}\n") + + +@app.command() +def generate_diagrams( + output_dir: Annotated[str, typer.Option("--output", "-o", help="Output directory")] = ".mem/docs/diagrams", + types: Annotated[str, typer.Option("--types", "-t", help="Diagram types (comma-separated)")] = "architecture,class,dependency", + project_path: Annotated[str, typer.Option("--path", "-p", help="Project path")] = ".", +): + """ + Generate Mermaid diagrams for the project. + + Examples: + mem-docgen generate-diagrams + mem-docgen generate-diagrams --types "architecture,class" + mem-docgen generate-diagrams --output ./docs/diagrams + """ + console.print("\n[cyan]📊 Generating diagrams...[/cyan]\n") + + # Initialize components + analyzer, _, diagram_gen, _ = init_components(project_path) + + # Create output directory + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console + ) as progress: + # Analyze project + task = progress.add_task("Analyzing code...", total=None) + modules = analyzer.analyze_directory(project_path) + + if not modules: + console.print("[yellow]⚠ No modules found to analyze[/yellow]") + sys.exit(0) + + console.print(f" Analyzed {len(modules)} modules") + console.print() + + # Parse diagram types + diagram_types = [t.strip() for t in types.split(",")] + + # Generate architecture diagram + if "architecture" in diagram_types: + progress.update(task, description="Generating architecture diagram...") + arch_diagram = diagram_gen.generate_architecture_diagram(modules) + arch_file = output_path / "architecture.md" + with open(arch_file, 'w', encoding='utf-8') as f: + f.write("# Architecture Diagram\n\n") + f.write(arch_diagram) + console.print(f"[green]✓[/green] Architecture → {arch_file}") + + # Generate class diagram + if "class" in diagram_types: + progress.update(task, description="Generating class diagram...") + all_classes = [] + for module in modules: + all_classes.extend(module.classes) + + if all_classes: + class_diagram = diagram_gen.generate_class_diagram(all_classes[:30]) + class_file = output_path / "classes.md" + with open(class_file, 'w', encoding='utf-8') as f: + f.write("# Class Diagram\n\n") + f.write(class_diagram) + console.print(f"[green]✓[/green] Classes → {class_file}") + else: + console.print("[yellow]⚠ No classes found[/yellow]") + + # Generate dependency graph + if "dependency" in diagram_types: + progress.update(task, description="Generating dependency graph...") + dependencies = analyzer.get_dependencies(modules) + if dependencies: + dep_diagram = diagram_gen.generate_dependency_graph(dependencies) + dep_file = output_path / "dependencies.md" + with open(dep_file, 'w', encoding='utf-8') as f: + f.write("# Dependency Graph\n\n") + f.write(dep_diagram) + console.print(f"[green]✓[/green] Dependencies → {dep_file}") + + console.print(f"\n[bold green]✨ Done![/bold green] View at: {output_path}\n") + + +@app.command() +def list_commits( + branch: Annotated[Optional[str], typer.Argument(help="Branch name (default: current)")] = None, + limit: Annotated[int, typer.Option("--limit", "-n", help="Number of commits to show")] = 10, + base_branch: Annotated[Optional[str], typer.Option("--base", "-b", help="Base branch for comparison")] = None, + project_path: Annotated[str, typer.Option("--path", "-p", help="Project path")] = ".", +): + """ + List commits in a branch. + + Examples: + mem-docgen list-commits + mem-docgen list-commits feat/new-feature + mem-docgen list-commits --base main --limit 20 + """ + _, _, _, git_utils = init_components(project_path) + + # Get branch name + if not branch: + branch = git_utils.get_current_branch() + if not branch: + console.print("[red]✗ Not on a branch[/red]") + sys.exit(1) + + # Get commits + if base_branch: + commits = git_utils.get_branch_commits(branch, base_branch) + else: + commits = git_utils.get_commits_in_range("HEAD~10", "HEAD")[:limit] + + if not commits: + console.print("[yellow]⚠ No commits found[/yellow]") + return + + # Create table + table = Table(title=f"Commits in '{branch}'") + table.add_column("Hash", style="cyan", no_wrap=True) + table.add_column("Author", style="green") + table.add_column("Date", style="magenta") + table.add_column("Message", style="white") + + for commit in commits[:limit]: + table.add_row( + commit.hash[:8], + commit.author, + commit.date.strftime("%Y-%m-%d %H:%M"), + commit.message.split('\n')[0][:60] + ) + + console.print() + console.print(table) + console.print() + + +@app.command() +def preview( + docs_dir: Annotated[str, typer.Option("--dir", "-d", help="Documentation directory")] = ".mem/docs", + port: Annotated[int, typer.Option("--port", "-p", help="Server port")] = 8000, + host: Annotated[str, typer.Option("--host", "-h", help="Server host")] = "127.0.0.1", +): + """ + Start web preview server for documentation. + + Examples: + mem-docgen preview + mem-docgen preview --port 8080 + mem-docgen preview --dir ./docs + """ + console.print(f"\n[cyan]🚀 Starting documentation preview server...[/cyan]\n") + + # Check if docs directory exists + docs_path = Path(docs_dir) + if not docs_path.exists(): + console.print(f"[yellow]⚠ Documentation directory not found: {docs_path}[/yellow]") + console.print("[yellow] Creating directory...[/yellow]") + docs_path.mkdir(parents=True, exist_ok=True) + + # Start server + try: + start_preview_server(docs_dir=docs_dir, host=host, port=port) + except KeyboardInterrupt: + console.print("\n[yellow]Server stopped[/yellow]") + except Exception as e: + console.print(f"\n[red]Server error: {e}[/red]") + sys.exit(1) + + +@app.command() +def info( + project_path: Annotated[str, typer.Option("--path", "-p", help="Project path")] = ".", +): + """ + Show project information and statistics. + + Examples: + mem-docgen info + mem-docgen info --path /path/to/project + """ + console.print("\n[cyan]📊 Project Information[/cyan]\n") + + # Initialize components + analyzer, _, _, git_utils = init_components(project_path) + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console + ) as progress: + # Analyze project + task = progress.add_task("Analyzing project...", total=None) + modules = analyzer.analyze_directory(project_path) + + if not modules: + console.print("[yellow]⚠ No Python modules found[/yellow]") + return + + # Generate summary + summary = analyzer.generate_summary(modules) + + # Get Git info + current_branch = git_utils.get_current_branch() + all_branches = git_utils.get_all_branches() + + # Display information + console.print("[bold]Project Statistics:[/bold]") + console.print(f" Files: {summary['total_files']}") + console.print(f" Lines of Code: {summary['total_loc']:,}") + console.print(f" Functions: {summary['total_functions']}") + console.print(f" Classes: {summary['total_classes']}") + console.print(f" Methods: {summary['total_methods']}") + console.print(f" Avg LOC/File: {summary['avg_loc_per_file']}") + console.print() + + console.print("[bold]Git Information:[/bold]") + console.print(f" Current Branch: {current_branch or 'detached HEAD'}") + console.print(f" Total Branches: {len(all_branches)}") + console.print() + + if summary.get('external_dependencies'): + console.print("[bold]External Dependencies:[/bold]") + for dep in summary['external_dependencies'][:10]: + console.print(f" • {dep}") + if len(summary['external_dependencies']) > 10: + console.print(f" ... and {len(summary['external_dependencies']) - 10} more") + console.print() + + +def main(): + """Main entry point.""" + setup_logging(".") + app() + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index d7d7497..9bd6850 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "uvicorn>=0.35.0", "chromadb>=0.5.0", "rich>=13.0.0", + "litellm>=1.0.0", ] [project.optional-dependencies] @@ -27,6 +28,7 @@ dependencies = [ mem = "memov.main:main" mem-mcp-server = "mem_mcp_server.cli.server_cli:main" mem-mcp-launcher = "mem_mcp_server.server.mcp_launcher:main" +mem-docgen = "memov.docgen_cli:main" [tool.uv] package = true @@ -36,6 +38,9 @@ packages = [ "memov", "memov.core", "memov.utils", + "memov.docgen", + "memov.debugging", + "memov.storage", "mem_mcp_server", "mem_mcp_server.cli", "mem_mcp_server.server",