diff --git a/.python-version b/.python-version index e4fba21..92536a9 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.12 +3.12.0 diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index c9c6edb..3f2a084 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -13,37 +13,38 @@ AI Dev OS is a unified platform for autonomous AI agent development, combining: ### Unified Platform Architecture -The following diagram illustrates the integrated flow of the six core technologies within AI Dev OS, as defined in the official platform design: +The following diagram illustrates the integrated flow of the six core technologies within AI Dev OS, as defined in the official platform design board: -![Architecture Overview](file:///C:/Users/HASSA/.gemini/antigravity/brain/939dc489-ea81-40ea-847a-3133f7b9fe6b/architecture_overview.png) + +**[Official Figma Board: Unified AI Platform Architecture](https://www.figma.com/board/A4TS4yuzBMF9g3IiMcdrBu/unified_ai_platform_architecture?node-id=0-1&t=C9sd6PrCRYsozLOb-0)** ```mermaid graph TD - Start((brainstorm)) -->|refinement| B[Superpowers Skill:
Brainstorming + Design] - B -->|dispatch| C{Approved Spec +
Implementation Plan} - C --> D[Subagent Orchestration:
LangGraph + Middleware] + Start((brainstorm)) -->|refinement| B["Superpowers Skill:
Brainstorming + Design"] + B -->|dispatch| C{"Approved Spec +
Implementation Plan"} + C --> D["Subagent Orchestration:
LangGraph + Middleware"] - D -->|spawn Agent C| E[Sim Agent in Sandbox] - D -->|spawn Agent B| F[Training Agent in Sandbox] - D -->|spawn Agent A| G[Code Agent in Sandbox] + D -->|spawn Agent C| E["Sim Agent (Newton)"] + D -->|spawn Agent B| F["Training Agent (Unsloth)"] + D -->|spawn Agent A| G["Code Agent (Sandbox)"] E -->|Newton| H[Newton Engine] F -->|Unsloth| I[Unsloth Framework] - H --> J[Physics Simulation:
GPU-accelerated] - I --> K[Model Training:
2x faster, 70% less VRAM] + H -->|metrics| J["Physics Simulation:
GPU-accelerated"] + I -->|checkpoints| K["Model Training:
2x faster, 70% less VRAM"] - J -->|results| L[BitNet.cpp:
1-bit inference] - K -->|checkpoint| L + J -->|results| L["BitNet.cpp:
1-bit inference"] + K -->|loss curves| L - G -->|read/write/test| M[Isolated Cloud Sandbox:
Modal/Daytona] + G -->|read/write/test| M["Isolated Cloud Sandbox:
Modal/Daytona"] L -->|test code| M - G -->|code ready| N[Superpowers Skills:
TDD + Code Review] - N -->|verified| O[Merge & Deploy:
Auto-PR + GitHub] + G -->|code ready| N["Superpowers Skills:
TDD + Code Review"] + N -->|verified| O["Merge & Deploy:
Auto-PR + GitHub"] O --> Production[[Live Result:
Agent output running]] - J -->|metrics| P[Claude HUD:
Status + Context + Tools] + J -->|metrics| P["Claude HUD:
Status + Context + Tools"] K -->|loss curves| P L -->|real-time data| P M -->|logs| P diff --git a/README.md b/README.md index 6734097..6759a1d 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ AI Dev OS is an integrated platform where autonomous AI agents can handle comple I have integrated the official **Unified AI Platform Architecture** from your design board into the project documentation. -![Architecture Overview](file:///C:/Users/HASSA/.gemini/antigravity/brain/939dc489-ea81-40ea-847a-3133f7b9fe6b/architecture_overview.png) + ``` Developer Request (Slack/Linear/CLI) @@ -261,7 +261,7 @@ Create `~/.claude/plugins/claude-hud/config.json`: ## šŸ“š Documentation -- [**ARCHITECTURE.md**](docs/ARCHITECTURE.md) - Deep dive into system design +- [**ARCHITECTURE.md**](ARCHITECTURE.md) - Deep dive into system design - [**SETUP_GUIDE.md**](docs/SETUP_GUIDE.md) - Detailed installation for each OS - [**WORKFLOWS.md**](docs/WORKFLOWS.md) - How to trigger and manage agent workflows - [**CUSTOMIZATION.md**](docs/CUSTOMIZATION.md) - Extend with custom skills/tools diff --git a/app/dashboard.py b/app/dashboard.py index 981311b..42cbb12 100644 --- a/app/dashboard.py +++ b/app/dashboard.py @@ -31,6 +31,7 @@ # ─── Authentication ──────────────────────────────────────────────── + def check_password(): """Returns True if the user had the correct password.""" @@ -56,6 +57,7 @@ def password_entered(): else: return True + if not check_password(): st.stop() # Do not continue if check_password is not True. diff --git a/baseline_test_output.txt b/baseline_test_output.txt new file mode 100644 index 0000000..6673c91 --- /dev/null +++ b/baseline_test_output.txt @@ -0,0 +1,250 @@ +============================= test session starts ============================= +platform win32 -- Python 3.12.0, pytest-9.0.2, pluggy-1.6.0 +rootdir: C:\Users\HASSA\Desktop\AI-DEV-OS +configfile: pyproject.toml +testpaths: tests +plugins: anyio-4.12.1, langsmith-0.7.22, asyncio-1.3.0, cov-7.0.0 +asyncio: mode=Mode.AUTO, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function +collected 38 items + +tests\test_core.py F.. [ 7%] +tests\test_core_comprehensive.py ..........FFFFFFF.... [ 63%] +tests\test_integrations.py ... [ 71%] +tests\test_models.py .. [ 76%] +tests\test_sandbox.py .. [ 81%] +tests\test_skills.py ... [ 89%] +tests\test_utils.py .... [100%] + +================================== FAILURES =================================== +______________________ test_orchestrator_initialization _______________________ + +mock_anthropic = + + @pytest.mark.asyncio + async def test_orchestrator_initialization(mock_anthropic): +> orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.DOCKER) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core.py:21: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +_________________ TestAIDevOSOrchestrator.test_initialization _________________ + +self = +mock_anthropic = + + @patch("ai_dev_os.core.Anthropic") + def test_initialization(self, mock_anthropic): +> orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.DOCKER) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core_comprehensive.py:159: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +_____________ TestAIDevOSOrchestrator.test_determine_agents_code ______________ + +self = +mock_anthropic = + + @patch("ai_dev_os.core.Anthropic") + def test_determine_agents_code(self, mock_anthropic): +> orchestrator = AIDevOSOrchestrator() + ^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core_comprehensive.py:167: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +___________ TestAIDevOSOrchestrator.test_determine_agents_training ____________ + +self = +mock_anthropic = + + @patch("ai_dev_os.core.Anthropic") + def test_determine_agents_training(self, mock_anthropic): +> orchestrator = AIDevOSOrchestrator() + ^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core_comprehensive.py:174: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +__________ TestAIDevOSOrchestrator.test_determine_agents_simulation ___________ + +self = +mock_anthropic = + + @patch("ai_dev_os.core.Anthropic") + def test_determine_agents_simulation(self, mock_anthropic): +> orchestrator = AIDevOSOrchestrator() + ^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core_comprehensive.py:181: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +____________ TestAIDevOSOrchestrator.test_determine_agents_default ____________ + +self = +mock_anthropic = + + @patch("ai_dev_os.core.Anthropic") + def test_determine_agents_default(self, mock_anthropic): +> orchestrator = AIDevOSOrchestrator() + ^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core_comprehensive.py:188: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +__________ TestAIDevOSOrchestrator.test_determine_agents_multi_role ___________ + +self = +mock_anthropic = + + @patch("ai_dev_os.core.Anthropic") + def test_determine_agents_multi_role(self, mock_anthropic): +> orchestrator = AIDevOSOrchestrator() + ^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core_comprehensive.py:195: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +_________________ TestAIDevOSOrchestrator.test_skills_loaded __________________ + +self = +mock_anthropic = + + @patch("ai_dev_os.core.Anthropic") + def test_skills_loaded(self, mock_anthropic): +> orchestrator = AIDevOSOrchestrator() + ^^^^^^^^^^^^^^^^^^^^^ + +tests\test_core_comprehensive.py:203: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +self = +sandbox_provider = + + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): + self.sandbox_provider = sandbox_provider + import os + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: +> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") +E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. + +src\ai_dev_os\core.py:327: ValueError +============================== warnings summary =============================== +tests/test_core.py::test_workflow_state_logging +tests/test_core_comprehensive.py::TestWorkflowState::test_state_initialization +tests/test_core_comprehensive.py::TestWorkflowState::test_add_log +tests/test_core_comprehensive.py::TestWorkflowState::test_state_transitions +tests/test_core_comprehensive.py::TestWorkflowState::test_context_usage +tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_creates_file +tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_empty_agents + C:\Users\HASSA\Desktop\AI-DEV-OS\src\ai_dev_os\core.py:93: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + self.created_at = datetime.utcnow().isoformat() + +tests/test_core.py::test_workflow_state_logging +tests/test_core_comprehensive.py::TestWorkflowState::test_add_log +tests/test_core_comprehensive.py::TestWorkflowState::test_add_log + C:\Users\HASSA\Desktop\AI-DEV-OS\src\ai_dev_os\core.py:97: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + self.logs.append(f"[{datetime.utcnow().isoformat()}] {message}") + +tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_creates_file +tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_empty_agents + C:\Users\HASSA\Desktop\AI-DEV-OS\src\ai_dev_os\core.py:177: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC). + "timestamp": datetime.utcnow().isoformat(), + +-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html +=========================== short test summary info =========================== +FAILED tests/test_core.py::test_orchestrator_initialization - ValueError: CRI... +FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_initialization +FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_code +FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_training +FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_simulation +FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_default +FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_multi_role +FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_skills_loaded +================== 8 failed, 30 passed, 12 warnings in 3.59s ================== diff --git a/scripts/setup-sandboxes.py b/scripts/setup-sandboxes.py index 70dfbf6..9153fda 100644 --- a/scripts/setup-sandboxes.py +++ b/scripts/setup-sandboxes.py @@ -15,10 +15,7 @@ from pathlib import Path # Setup logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s [%(levelname)s] %(name)s: %(message)s' -) +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") logger = logging.getLogger(__name__) @@ -26,13 +23,13 @@ def setup_claude_hud_config(): """Setup Claude HUD configuration.""" hud_config_dir = Path.home() / ".claude" / "plugins" / "claude-hud" hud_config_dir.mkdir(parents=True, exist_ok=True) - + config_file = hud_config_dir / "config.json" - + if config_file.exists(): logger.info(f"Claude HUD config already exists at {config_file}") return - + config = { "lineLayout": "expanded", "pathLevels": 2, @@ -41,7 +38,7 @@ def setup_claude_hud_config(): "enabled": True, "showDirty": True, "showAheadBehind": False, - "showFileStats": False + "showFileStats": False, }, "display": { "showModel": True, @@ -53,20 +50,20 @@ def setup_claude_hud_config(): "showTools": True, "showAgents": True, "showTodos": True, - "showSessionName": False + "showSessionName": False, }, "colors": { "context": "cyan", "usage": "cyan", "warning": "yellow", "usageWarning": "brightMagenta", - "critical": "red" - } + "critical": "red", + }, } - - with open(config_file, 'w') as f: + + with open(config_file, "w") as f: json.dump(config, f, indent=2) - + logger.info(f"āœ“ Claude HUD config created at {config_file}") @@ -81,7 +78,7 @@ def setup_local_directories(): Path.cwd() / ".ai-dev-os" / "data", Path.cwd() / ".ai-dev-os" / "results", ] - + for directory in dirs: directory.mkdir(parents=True, exist_ok=True) logger.info(f"āœ“ Directory created: {directory}") @@ -90,17 +87,18 @@ def setup_local_directories(): async def test_modal_setup(): """Test Modal sandbox setup.""" logger.info("Testing Modal setup...") - + try: import modal + logger.info("āœ“ Modal SDK installed") - + # Test authentication if os.environ.get("MODAL_TOKEN_ID") and os.environ.get("MODAL_TOKEN_SECRET"): logger.info("āœ“ Modal credentials detected") else: logger.warning("⚠ Modal credentials not found. Run: modal token new") - + return True except ImportError: logger.error("āœ— Modal not installed. Run: pip install modal") @@ -110,9 +108,10 @@ async def test_modal_setup(): async def test_docker_setup(): """Test Docker sandbox setup.""" logger.info("Testing Docker setup...") - + try: import docker + client = docker.from_env() client.ping() logger.info("āœ“ Docker is running") @@ -128,21 +127,21 @@ async def test_docker_setup(): async def test_anthropic_setup(): """Test Anthropic API setup.""" logger.info("Testing Anthropic API setup...") - + try: import anthropic - + api_key = os.environ.get("ANTHROPIC_API_KEY") if not api_key: logger.error("āœ— ANTHROPIC_API_KEY not set") return False - + client = anthropic.Anthropic() # Test with a simple message response = client.messages.create( model="claude-opus-4-20250514", max_tokens=10, - messages=[{"role": "user", "content": "Hi"}] + messages=[{"role": "user", "content": "Hi"}], ) logger.info("āœ“ Anthropic API working") return True @@ -154,11 +153,11 @@ async def test_anthropic_setup(): def create_env_file(): """Create .env file template if it doesn't exist.""" env_file = Path.cwd() / ".env" - + if env_file.exists(): logger.info(f"āœ“ .env file already exists") return - + env_content = """# AI Dev OS Environment Variables # Anthropic API @@ -185,10 +184,10 @@ def create_env_file(): # Development DEBUG=false """ - - with open(env_file, 'w') as f: + + with open(env_file, "w") as f: f.write(env_content) - + logger.info(f"āœ“ .env file created. Please edit with your configuration.") @@ -199,52 +198,48 @@ async def main(): "--provider", choices=["modal", "docker", "both"], default="docker", - help="Sandbox provider to test" + help="Sandbox provider to test", ) - parser.add_argument( - "--skip-tests", - action="store_true", - help="Skip provider tests" - ) - + parser.add_argument("--skip-tests", action="store_true", help="Skip provider tests") + args = parser.parse_args() - + logger.info("=" * 60) logger.info("AI Dev OS Setup") logger.info("=" * 60) - + # Setup directories and config logger.info("\n[1/4] Setting up directories...") setup_local_directories() - + logger.info("\n[2/4] Setting up Claude HUD...") setup_claude_hud_config() - + logger.info("\n[3/4] Creating .env file...") create_env_file() - + logger.info("\n[4/4] Testing providers...") - + if not args.skip_tests: results = {} - + if args.provider in ["modal", "both"]: results["modal"] = await test_modal_setup() - + if args.provider in ["docker", "both"]: results["docker"] = await test_docker_setup() - + # Always test Anthropic results["anthropic"] = await test_anthropic_setup() - + logger.info("\n" + "=" * 60) logger.info("Setup Status") logger.info("=" * 60) - + for provider, success in results.items(): status = "āœ“ PASS" if success else "āœ— FAIL" logger.info(f"{status}: {provider}") - + logger.info("\n" + "=" * 60) logger.info("Setup Complete!") logger.info("=" * 60) diff --git a/src/ai_dev_os/core.py b/src/ai_dev_os/core.py index 2b93d78..e620e71 100644 --- a/src/ai_dev_os/core.py +++ b/src/ai_dev_os/core.py @@ -7,25 +7,23 @@ import asyncio import json import logging -from dataclasses import dataclass, asdict +from dataclasses import asdict, dataclass from datetime import datetime from enum import Enum from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple -from langgraph.graph import StateGraph, START, END from anthropic import Anthropic +from langgraph.graph import END, START, StateGraph # Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s [%(levelname)s] %(name)s: %(message)s' -) +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") logger = logging.getLogger(__name__) class WorkflowPhase(Enum): """Stages of the AI Dev OS workflow.""" + BRAINSTORMING = "brainstorming" PLANNING = "planning" EXECUTION = "execution" @@ -35,6 +33,7 @@ class WorkflowPhase(Enum): class SandboxProvider(Enum): """Supported sandbox providers.""" + MODAL = "modal" DAYTONA = "daytona" RUNLOOP = "runloop" @@ -44,23 +43,24 @@ class SandboxProvider(Enum): @dataclass class AgentConfig: """Configuration for a subagent.""" + name: str role: str # "code", "training", "simulation" sandbox_provider: SandboxProvider max_tokens: int = 50000 temperature: float = 0.7 tools: List[str] = None - + def __post_init__(self): if self.tools is None: self.tools = self._default_tools() - + def _default_tools(self) -> List[str]: """Return default tools based on role.""" defaults = { "code": ["read_file", "write_file", "execute", "git_commit", "github_pr"], "training": ["unsloth_train", "bitnet_quantize", "model_upload"], - "simulation": ["newton_sim", "plot_results", "upload_metrics"] + "simulation": ["newton_sim", "plot_results", "upload_metrics"], } return defaults.get(self.role, []) @@ -68,6 +68,7 @@ def _default_tools(self) -> List[str]: @dataclass class WorkflowState: """Complete state of a workflow execution.""" + id: str phase: WorkflowPhase user_request: str @@ -79,7 +80,7 @@ class WorkflowState: active_agents: List[str] = None logs: List[str] = None created_at: str = None - + def __post_init__(self): if self.subagent_configs is None: self.subagent_configs = [] @@ -91,7 +92,7 @@ def __post_init__(self): self.logs = [] if self.created_at is None: self.created_at = datetime.utcnow().isoformat() - + def add_log(self, message: str): """Add a log entry.""" self.logs.append(f"[{datetime.utcnow().isoformat()}] {message}") @@ -100,17 +101,20 @@ def add_log(self, message: str): class SuperpowerSkill: """Wrapper for Superpowers skills.""" - + def __init__(self, name: str, trigger: str, system_prompt: str): self.name = name self.trigger = trigger self.system_prompt = system_prompt import os + api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: - raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. Cannot start SuperpowerSkill.") + raise ValueError( + "CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. Cannot start SuperpowerSkill." + ) self.client = Anthropic(api_key=api_key) - + async def execute(self, state: WorkflowState) -> str: """Execute the skill against the current state with caching.""" prompt = f""" @@ -125,17 +129,17 @@ async def execute(self, state: WorkflowState) -> str: Generate output for this skill: """ - + # Caching optimization import hashlib import json - + cache_dir = Path.home() / ".ai-dev-os" / "cache" cache_dir.mkdir(parents=True, exist_ok=True) - - prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest() + + prompt_hash = hashlib.md5(prompt.encode("utf-8")).hexdigest() cache_file = cache_dir / f"{self.name}_{prompt_hash}.json" - + if cache_file.exists(): state.add_log(f"Cache hit for skill optimization: {self.name}") try: @@ -144,33 +148,33 @@ async def execute(self, state: WorkflowState) -> str: return data.get("result", "") except json.JSONDecodeError: pass # Fall back to generation if cache is corrupted - + state.add_log(f"Executing skill: {self.name}") - + response = self.client.messages.create( model="claude-opus-4-20250514", max_tokens=4096, - messages=[{"role": "user", "content": prompt}] + messages=[{"role": "user", "content": prompt}], ) - + result = response.content[0].text - + # Save cache with open(cache_file, "w") as f: json.dump({"result": result}, f) - + state.add_log(f"Skill {self.name} completed, tokens used: {response.usage.output_tokens}") - + return result class ClaudeHUDIntegration: """Real-time Claude HUD status updates.""" - + def __init__(self): self.status_file = Path.home() / ".ai-dev-os" / "hud_status.json" self.status_file.parent.mkdir(parents=True, exist_ok=True) - + def update(self, state: WorkflowState, context_usage: float, active_agents: List[str]): """Update HUD with current state.""" status = { @@ -180,32 +184,37 @@ def update(self, state: WorkflowState, context_usage: float, active_agents: List "active_agents": active_agents, "recent_logs": state.logs[-3:] if state.logs else [], } - - with open(self.status_file, 'w') as f: + + with open(self.status_file, "w") as f: json.dump(status, f, indent=2) - + # Format for terminal display agent_str = ", ".join(active_agents) if active_agents else "none" - print(f"\n[HUD] Phase: {state.phase.value} | Context: {context_usage:.1f}% | Agents: {agent_str}") + print( + f"\n[HUD] Phase: {state.phase.value} | Context: {context_usage:.1f}% | Agents: {agent_str}" + ) class SubagentOrchestrator: """Orchestrates parallel subagent execution.""" - + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): self.sandbox_provider = sandbox_provider import os + api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: - raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") + raise ValueError( + "CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing." + ) self.client = Anthropic(api_key=api_key) self.hud = ClaudeHUDIntegration() - + async def spawn_agent(self, config: AgentConfig, task_description: str) -> str: """Spawn a subagent to handle a specific task.""" - + tools_str = "\n".join([f"- {tool}" for tool in config.tools]) - + system_prompt = f""" You are a specialized {config.role} agent in an autonomous development system. @@ -224,39 +233,39 @@ async def spawn_agent(self, config: AgentConfig, task_description: str) -> str: Task: {task_description} """ - + logger.info(f"Spawning subagent: {config.name} (role: {config.role})") - + response = self.client.messages.create( model="claude-opus-4-20250514", max_tokens=config.max_tokens, temperature=config.temperature, system=system_prompt, - messages=[{"role": "user", "content": "Begin execution."}] + messages=[{"role": "user", "content": "Begin execution."}], ) - + result = response.content[0].text logger.info(f"Subagent {config.name} completed") - + return result - + async def orchestrate(self, state: WorkflowState) -> WorkflowState: """Orchestrate all subagents in parallel.""" - + state.add_log(f"Starting parallel execution of {len(state.subagent_configs)} agents") state.phase = WorkflowPhase.EXECUTION - + # Update HUD agent_names = [cfg.name for cfg in state.subagent_configs] self.hud.update(state, state.context_usage, agent_names) - + # Execute all agents in parallel tasks = [] for config in state.subagent_configs: task_desc = self._generate_task_description(state, config) task = self.spawn_agent(config, task_desc) tasks.append((config.name, task)) - + # Gather results results = {} for agent_name, task in tasks: @@ -267,18 +276,18 @@ async def orchestrate(self, state: WorkflowState) -> WorkflowState: except Exception as e: state.add_log(f"Agent {agent_name} failed: {str(e)}") results[agent_name] = f"ERROR: {str(e)}" - + state.execution_results = results state.add_log("Parallel execution completed") - + # Update HUD self.hud.update(state, state.context_usage, []) - + return state - + def _generate_task_description(self, state: WorkflowState, config: AgentConfig) -> str: """Generate specific task description for an agent.""" - + task_descriptions = { "code": f""" Implement the following plan: @@ -310,33 +319,36 @@ def _generate_task_description(self, state: WorkflowState, config: AgentConfig) - Measure success rate and stability - Generate plots and metrics - Report results for validation -""" +""", } - + return task_descriptions.get(config.role, "Execute this task: " + state.implementation_plan) class AIDevOSOrchestrator: """Main orchestrator for the entire AI Dev OS system.""" - + def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL): self.sandbox_provider = sandbox_provider import os + api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: - raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.") + raise ValueError( + "CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing." + ) self.client = Anthropic(api_key=api_key) self.hud = ClaudeHUDIntegration() - + # Initialize Superpowers skills self.skills = self._load_skills() - + # Subagent orchestrator self.subagent_orchestrator = SubagentOrchestrator(sandbox_provider) - + # Load AGENTS.md rules self.agents_rules = self._load_agents_rules() - + def _load_skills(self) -> Dict[str, SuperpowerSkill]: """Load Superpowers skills.""" return { @@ -347,7 +359,7 @@ def _load_skills(self) -> Dict[str, SuperpowerSkill]: You are a brainstorming expert. Help refine the user's idea through Socratic questioning. Ask clarifying questions, explore alternatives, and present the design in digestible chunks. Output: A clear design document with requirements, architecture, and acceptance criteria. -""" +""", ), "planning": SuperpowerSkill( name="planning", @@ -356,7 +368,7 @@ def _load_skills(self) -> Dict[str, SuperpowerSkill]: You are a project planning expert. Break the design into bite-sized tasks (2-5 min each). Each task must include: exact file paths, complete code snippets, and verification steps. Output: A detailed implementation plan with task list and dependencies. -""" +""", ), "code-review": SuperpowerSkill( name="code-review", @@ -365,21 +377,21 @@ def _load_skills(self) -> Dict[str, SuperpowerSkill]: You are a code reviewer. Check the implementation against the plan. Report issues by severity: critical (blocks merge), major (should fix), minor (nice to have). Output: Review report with issues and recommendations. -""" - ) +""", + ), } - + def _load_agents_rules(self) -> Dict[str, Any]: """Load AGENTS.md rules from repo.""" agents_md = Path.cwd() / "AGENTS.md" - + if not agents_md.exists(): logger.warning("AGENTS.md not found, using defaults") return {} - + # Parse AGENTS.md (simplified - in production use proper markdown parser) content = agents_md.read_text() - + rules = { "raw": content, "enforce_brainstorming": "brainstorming: REQUIRED" in content, @@ -387,141 +399,138 @@ def _load_agents_rules(self) -> Dict[str, Any]: "enforce_tdd": "test-driven-development: REQUIRED" in content, "enforce_review": "requesting-code-review: REQUIRED" in content, } - + return rules - + async def run(self, user_request: str) -> WorkflowState: """Main entry point: run a complete workflow.""" - + # Initialize workflow state import uuid + state = WorkflowState( - id=str(uuid.uuid4()), - phase=WorkflowPhase.BRAINSTORMING, - user_request=user_request + id=str(uuid.uuid4()), phase=WorkflowPhase.BRAINSTORMING, user_request=user_request ) - + state.add_log(f"Starting workflow for request: {user_request}") self.hud.update(state, state.context_usage, []) - + # Phase 1: Brainstorming logger.info("=" * 60) logger.info("PHASE 1: BRAINSTORMING") logger.info("=" * 60) - + design_doc = await self.skills["brainstorming"].execute(state) state.design_doc = design_doc state.add_log("Design doc generated") - + print("\nšŸ“‹ DESIGN DOCUMENT:\n") print(design_doc) print("\n" + "=" * 60) - + # Ask for approval user_approval = input("\nApprove design? (yes/no): ").lower().strip() if user_approval != "yes": state.add_log("Design rejected by user") return state - + state.add_log("Design approved by user") - + # Phase 2: Planning logger.info("=" * 60) logger.info("PHASE 2: PLANNING") logger.info("=" * 60) - + state.phase = WorkflowPhase.PLANNING plan = await self.skills["planning"].execute(state) state.implementation_plan = plan state.add_log("Implementation plan generated") - + print("\nšŸ“ IMPLEMENTATION PLAN:\n") print(plan) print("\n" + "=" * 60) - + # Phase 3: Execution (Subagents) logger.info("=" * 60) logger.info("PHASE 3: EXECUTION (Subagents)") logger.info("=" * 60) - + # Determine which agents we need state.subagent_configs = self._determine_agents(user_request) - + state = await self.subagent_orchestrator.orchestrate(state) - + # Phase 4: Validation & Code Review logger.info("=" * 60) logger.info("PHASE 4: VALIDATION & CODE REVIEW") logger.info("=" * 60) - + state.phase = WorkflowPhase.VALIDATION review = await self.skills["code-review"].execute(state) state.add_log("Code review completed") - + print("\nāœ… CODE REVIEW:\n") print(review) - + # Phase 5: Merge (in production, this auto-creates PR) logger.info("=" * 60) logger.info("PHASE 5: MERGE") logger.info("=" * 60) - + state.phase = WorkflowPhase.MERGE state.add_log("Workflow completed successfully") - + print("\nšŸŽ‰ Workflow completed! PR ready for review.") - + return state - + def _determine_agents(self, user_request: str) -> List[AgentConfig]: """Determine which agents are needed for this request.""" - + request_lower = user_request.lower() agents = [] - + # Heuristic: detect what kind of task this is if any(word in request_lower for word in ["code", "build", "feature", "fix", "test"]): - agents.append(AgentConfig( - name="code-agent", - role="code", - sandbox_provider=self.sandbox_provider - )) - + agents.append( + AgentConfig(name="code-agent", role="code", sandbox_provider=self.sandbox_provider) + ) + if any(word in request_lower for word in ["train", "finetune", "model", "lora"]): - agents.append(AgentConfig( - name="training-agent", - role="training", - sandbox_provider=self.sandbox_provider - )) - + agents.append( + AgentConfig( + name="training-agent", role="training", sandbox_provider=self.sandbox_provider + ) + ) + if any(word in request_lower for word in ["simul", "robot", "physic", "test"]): - agents.append(AgentConfig( - name="simulation-agent", - role="simulation", - sandbox_provider=self.sandbox_provider - )) - + agents.append( + AgentConfig( + name="simulation-agent", + role="simulation", + sandbox_provider=self.sandbox_provider, + ) + ) + # Default to code agent if unclear if not agents: - agents.append(AgentConfig( - name="code-agent", - role="code", - sandbox_provider=self.sandbox_provider - )) - + agents.append( + AgentConfig(name="code-agent", role="code", sandbox_provider=self.sandbox_provider) + ) + return agents async def main(): """Example main function.""" - + orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.MODAL) - + # Example request user_request = "Build a simple authentication module with tests and documentation" - + state = await orchestrator.run(user_request) - + # Print summary print("\n" + "=" * 60) print("WORKFLOW SUMMARY") @@ -530,12 +539,12 @@ async def main(): print(f"Status: {'COMPLETED' if state.phase == WorkflowPhase.MERGE else 'IN PROGRESS'}") print(f"Total logs: {len(state.logs)}") print(f"Agents used: {len(state.subagent_configs)}") - + # Save state for reference state_file = Path.home() / ".ai-dev-os" / f"workflow_{state.id}.json" state_file.parent.mkdir(parents=True, exist_ok=True) - - with open(state_file, 'w') as f: + + with open(state_file, "w") as f: # Convert dataclasses to dicts for JSON serialization state_dict = { "id": state.id, @@ -545,10 +554,10 @@ async def main(): "implementation_plan": state.implementation_plan, "execution_results": state.execution_results, "created_at": state.created_at, - "logs": state.logs + "logs": state.logs, } json.dump(state_dict, f, indent=2) - + print(f"\nWorkflow state saved to: {state_file}") diff --git a/src/ai_dev_os/models.py b/src/ai_dev_os/models.py index 10ba232..aabaa2c 100644 --- a/src/ai_dev_os/models.py +++ b/src/ai_dev_os/models.py @@ -5,7 +5,7 @@ import asyncio import json import logging -from dataclasses import dataclass, asdict +from dataclasses import asdict, dataclass from enum import Enum from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -17,6 +17,7 @@ class QuantizationType(Enum): """Quantization types supported.""" + FLOAT16 = "float16" INT8 = "int8" INT4 = "int4" @@ -27,6 +28,7 @@ class QuantizationType(Enum): @dataclass class ModelConfig: """Configuration for model training or inference.""" + model_name: str # huggingface model ID task: str # "train" or "inference" quantization: QuantizationType = QuantizationType.INT4 @@ -35,11 +37,11 @@ class ModelConfig: learning_rate: float = 5e-5 num_epochs: int = 3 output_dir: str = "./models" - + # For training dataset_path: Optional[str] = None validation_split: float = 0.1 - + # For inference temperature: float = 0.7 top_p: float = 0.9 @@ -48,50 +50,52 @@ class ModelConfig: class UnslothTrainer: """Wrapper for Unsloth training.""" - + def __init__(self, config: ModelConfig): self.config = config self.model = None self.tokenizer = None self.trainer = None self.training_logs: List[Dict[str, Any]] = [] - + async def setup(self) -> bool: """ Setup Unsloth trainer. """ try: logger.info(f"Setting up Unsloth trainer for {self.config.model_name}") - + try: from unsloth import FastLanguageModel + # Apply 4-bit load or appropriate config max_seq_length = self.config.max_seq_length self.model, self.tokenizer = FastLanguageModel.from_pretrained( - model_name = self.config.model_name, - max_seq_length = max_seq_length, - dtype = None, - load_in_4bit = self.config.quantization.value == "int4", + model_name=self.config.model_name, + max_seq_length=max_seq_length, + dtype=None, + load_in_4bit=self.config.quantization.value == "int4", ) except ImportError: logger.warning("Unsloth not installed. Simulating loading.") await asyncio.sleep(1) - - self.training_logs.append({ - "stage": "setup", - "status": "success", - "model": self.config.model_name, - "quantization": self.config.quantization.value - }) - + self.training_logs.append( + { + "stage": "setup", + "status": "success", + "model": self.config.model_name, + "quantization": self.config.quantization.value, + } + ) + logger.info("Unsloth trainer ready") return True - + except Exception as e: logger.error(f"Setup failed: {str(e)}") return False - + async def train(self) -> Tuple[bool, Dict[str, Any]]: """ Run training with Unsloth (2x faster, 70% less VRAM). @@ -100,27 +104,29 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]: try: if not await self.setup(): return False, {} - + logger.info(f"Starting training on {self.config.model_name}") logger.info(f"Quantization: {self.config.quantization.value}") logger.info(f"Batch size: {self.config.batch_size}") - + metrics = {} - + try: from unsloth import FastLanguageModel + FastLanguageModel.for_training(self.model) - + + from datasets import load_dataset from transformers import TrainingArguments from trl import SFTTrainer - from datasets import load_dataset - + # Load dataset - dataset = load_dataset( - self.config.dataset_path, - split="train" - ) if self.config.dataset_path else None - + dataset = ( + load_dataset(self.config.dataset_path, split="train") + if self.config.dataset_path + else None + ) + training_args = TrainingArguments( per_device_train_batch_size=self.config.batch_size, learning_rate=self.config.learning_rate, @@ -130,7 +136,7 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]: logging_steps=10, fp16=True, ) - + trainer = SFTTrainer( model=self.model, tokenizer=self.tokenizer, @@ -139,24 +145,22 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]: args=training_args, max_seq_length=self.config.max_seq_length, ) - + # Train train_result = trainer.train() - + metrics = { "final_loss": train_result.training_loss, "train_loss_history": [ - log.get("loss", 0) - for log in trainer.state.log_history - if "loss" in log + log.get("loss", 0) for log in trainer.state.log_history if "loss" in log ], "validation_loss": train_result.metrics.get("eval_loss", 0), - "perplexity": 2 ** train_result.training_loss, + "perplexity": 2**train_result.training_loss, "training_time_minutes": train_result.metrics.get("train_runtime", 0) / 60, "speedup_vs_standard": 2.15, "vram_reduction_percent": 68.5, } - + except ImportError: logger.warning("Unsloth/transformers not installed. Using simulated training.") await asyncio.sleep(1) @@ -170,63 +174,59 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]: "speedup_vs_standard": 2.15, "vram_reduction_percent": 68.5, } - - self.training_logs.append({ - "stage": "training", - "status": "success", - **metrics - }) - + + self.training_logs.append({"stage": "training", "status": "success", **metrics}) + logger.info(f"Training completed. Loss: {metrics['final_loss']}") logger.info(f"VRAM savings: {metrics.get('vram_reduction_percent', 0):.1f}%") - + return True, metrics - + except Exception as e: logger.error(f"Training failed: {str(e)}") return False, {} - + async def save_checkpoint(self, path: str) -> bool: """Save trained model checkpoint.""" try: output_path = Path(path) output_path.mkdir(parents=True, exist_ok=True) - + # Save model config config_file = output_path / "config.json" - with open(config_file, 'w') as f: + with open(config_file, "w") as f: json.dump(asdict(self.config), f, indent=2, default=str) - + # Save training logs logs_file = output_path / "training_logs.json" - with open(logs_file, 'w') as f: + with open(logs_file, "w") as f: json.dump(self.training_logs, f, indent=2) - + logger.info(f"Checkpoint saved to {output_path}") return True - + except Exception as e: logger.error(f"Save failed: {str(e)}") return False - + async def quantize_to_bitnet(self, path: str) -> bool: """Quantize trained model to BitNet 1.58-bit format.""" try: logger.info("Starting BitNet quantization") - + output_path = Path(path) / "bitnet_model.gguf" output_path.parent.mkdir(parents=True, exist_ok=True) - + # In production, use bitnet.cpp conversion utilities # For now, simulate await asyncio.sleep(2) - + # Create dummy GGUF file output_path.touch() - + logger.info(f"BitNet model saved to {output_path}") return True - + except Exception as e: logger.error(f"Quantization failed: {str(e)}") return False @@ -234,19 +234,20 @@ async def quantize_to_bitnet(self, path: str) -> bool: class BitNetInference: """BitNet 1-bit LLM inference engine.""" - + def __init__(self, model_path: str): self.model_path = model_path self.model = None self.context_tokens: List[int] = [] - + async def load(self) -> bool: """Load BitNet model.""" try: logger.info(f"Loading BitNet model from {self.model_path}") - + try: from llama_cpp import Llama + self.model = Llama( model_path=self.model_path, n_ctx=4096, @@ -257,20 +258,15 @@ async def load(self) -> bool: logger.warning("llama_cpp not installed. Simulating BitNet inference loading.") await asyncio.sleep(0.5) - logger.info("Model loaded successfully") return True - + except Exception as e: logger.error(f"Load failed: {str(e)}") return False - + async def infer( - self, - prompt: str, - max_tokens: int = 512, - temperature: float = 0.7, - top_p: float = 0.9 + self, prompt: str, max_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9 ) -> Tuple[bool, str]: """ Run inference on 1-bit model. @@ -279,27 +275,26 @@ async def infer( try: if not self.model: await self.load() - + logger.info(f"Running inference: {prompt[:50]}...") - + if hasattr(self.model, "create_completion"): output = self.model.create_completion( - prompt, - max_tokens=max_tokens, - temperature=temperature, - top_p=top_p + prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p )["choices"][0]["text"] else: await asyncio.sleep(0.1) # Simulate inference time output = f"[BitNet inference result for: {prompt[:20]}...]" - + return True, output - + except Exception as e: logger.error(f"Inference failed: {str(e)}") return False, "" - - async def batch_infer(self, prompts: List[str], max_tokens: int = 512) -> Tuple[bool, List[str]]: + + async def batch_infer( + self, prompts: List[str], max_tokens: int = 512 + ) -> Tuple[bool, List[str]]: """Batch inference (more efficient).""" try: results = [] @@ -308,9 +303,9 @@ async def batch_infer(self, prompts: List[str], max_tokens: int = 512) -> Tuple[ if not success: return False, [] results.append(output) - + return True, results - + except Exception as e: logger.error(f"Batch inference failed: {str(e)}") return False, [] @@ -318,62 +313,59 @@ async def batch_infer(self, prompts: List[str], max_tokens: int = 512) -> Tuple[ class ModelManager: """High-level model management (training + inference).""" - + def __init__(self): self.trainers: Dict[str, UnslothTrainer] = {} self.inference_engines: Dict[str, BitNetInference] = {} - + async def train_model(self, config: ModelConfig) -> Tuple[bool, Dict[str, Any]]: """Train a model with Unsloth.""" trainer = UnslothTrainer(config) self.trainers[config.model_name] = trainer - + success, metrics = await trainer.train() - + if success: # Save checkpoint output_dir = Path(config.output_dir) / config.model_name await trainer.save_checkpoint(str(output_dir)) - + # Optionally quantize to BitNet if config.quantization in [QuantizationType.BITNET_1BIT, QuantizationType.BITNET_1P58]: await trainer.quantize_to_bitnet(str(output_dir)) - + return success, metrics - + async def load_inference_engine(self, model_path: str, model_id: str) -> bool: """Load a BitNet model for inference.""" engine = BitNetInference(model_path) self.inference_engines[model_id] = engine return await engine.load() - + async def infer(self, model_id: str, prompt: str, max_tokens: int = 512) -> Tuple[bool, str]: """Run inference.""" if model_id not in self.inference_engines: logger.error(f"Model {model_id} not loaded") return False, "" - + return await self.inference_engines[model_id].infer(prompt, max_tokens=max_tokens) - + def get_training_stats(self, model_name: str) -> Optional[Dict[str, Any]]: """Get training statistics.""" if model_name not in self.trainers: return None - + logs = self.trainers[model_name].training_logs if not logs: return None - + training_log = [log for log in logs if log.get("stage") == "training"] return training_log[0] if training_log else None # Convenience functions async def train_model( - model_name: str, - dataset_path: str, - quantization: str = "int4", - **kwargs + model_name: str, dataset_path: str, quantization: str = "int4", **kwargs ) -> Tuple[bool, Dict[str, Any]]: """Convenience function for training.""" config = ModelConfig( @@ -381,9 +373,9 @@ async def train_model( task="train", dataset_path=dataset_path, quantization=QuantizationType[quantization.upper()], - **kwargs + **kwargs, ) - + manager = ModelManager() return await manager.train_model(config) @@ -393,5 +385,5 @@ async def inference(model_path: str, prompt: str, max_tokens: int = 512) -> Tupl engine = BitNetInference(model_path) if not await engine.load(): return False, "" - + return await engine.infer(prompt, max_tokens=max_tokens) diff --git a/src/ai_dev_os/sandbox.py b/src/ai_dev_os/sandbox.py index bf2e5a1..b4f6eb9 100644 --- a/src/ai_dev_os/sandbox.py +++ b/src/ai_dev_os/sandbox.py @@ -16,6 +16,7 @@ class SandboxStatus(Enum): """Status of a sandbox.""" + INITIALIZING = "initializing" READY = "ready" RUNNING = "running" @@ -26,6 +27,7 @@ class SandboxStatus(Enum): @dataclass class SandboxConfig: """Configuration for sandbox creation.""" + provider: str # "modal", "daytona", "runloop", "docker" name: str python_version: str = "3.10" @@ -35,7 +37,7 @@ class SandboxConfig: gpu_type: Optional[str] = None # "a100", "h100", etc. env_vars: Dict[str, str] = None mounts: Dict[str, str] = None # local_path -> container_path - + def __post_init__(self): if self.env_vars is None: self.env_vars = {} @@ -45,18 +47,18 @@ def __post_init__(self): class Sandbox(ABC): """Abstract base class for sandboxes.""" - + def __init__(self, config: SandboxConfig): self.config = config self.id: Optional[str] = None self.status = SandboxStatus.INITIALIZING self.logs: List[str] = [] - + @abstractmethod async def initialize(self) -> str: """Initialize the sandbox. Returns sandbox ID.""" pass - + @abstractmethod async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]: """ @@ -64,22 +66,22 @@ async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str Returns: (exit_code, stdout, stderr) """ pass - + @abstractmethod async def upload_file(self, local_path: str, remote_path: str) -> bool: """Upload a file to the sandbox.""" pass - + @abstractmethod async def download_file(self, remote_path: str, local_path: str) -> bool: """Download a file from the sandbox.""" pass - + @abstractmethod async def terminate(self) -> bool: """Terminate the sandbox.""" pass - + def add_log(self, message: str): """Add a log entry.""" self.logs.append(message) @@ -88,54 +90,55 @@ def add_log(self, message: str): class ModalSandbox(Sandbox): """Modal-based sandbox (https://modal.com).""" - + async def initialize(self) -> str: """Initialize a Modal sandbox.""" try: import modal - + # Create Modal app self.app = modal.App(name=f"ai-dev-os-{self.config.name}") - + # Define environment - image = modal.Image.debian_slim(python_version=self.config.python_version) \ - .pip_install("anthropic", "langgraph", "torch", "transformers") - + image = modal.Image.debian_slim(python_version=self.config.python_version).pip_install( + "anthropic", "langgraph", "torch", "transformers" + ) + self.app.function( image=image, timeout=self.config.timeout_seconds, gpu=modal.gpu.A100() if self.config.gpu else None, - allow_concurrent_inputs=10 + allow_concurrent_inputs=10, ) - + self.id = self.app.name self.status = SandboxStatus.READY self.add_log(f"Modal sandbox initialized: {self.id}") - + return self.id - + except ImportError: logger.error("Modal not installed. Install with: pip install modal") self.status = SandboxStatus.ERROR raise - + async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]: """Execute command in Modal.""" try: # In production, use modal.run to execute # For now, return mock response self.add_log(f"Executing: {command}") - + # Mock execution await asyncio.sleep(0.5) - + return (0, f"[mock] {command} completed", "") - + except Exception as e: self.status = SandboxStatus.ERROR self.add_log(f"Execution failed: {str(e)}") return (1, "", str(e)) - + async def upload_file(self, local_path: str, remote_path: str) -> bool: """Upload file to Modal sandbox.""" try: @@ -145,7 +148,7 @@ async def upload_file(self, local_path: str, remote_path: str) -> bool: except Exception as e: self.add_log(f"Upload failed: {str(e)}") return False - + async def download_file(self, remote_path: str, local_path: str) -> bool: """Download file from Modal sandbox.""" try: @@ -155,7 +158,7 @@ async def download_file(self, remote_path: str, local_path: str) -> bool: except Exception as e: self.add_log(f"Download failed: {str(e)}") return False - + async def terminate(self) -> bool: """Terminate Modal sandbox.""" try: @@ -169,7 +172,7 @@ async def terminate(self) -> bool: class DaytonaSandbox(Sandbox): """Daytona-based sandbox (https://daytona.io).""" - + async def initialize(self) -> str: """Initialize a Daytona sandbox.""" try: @@ -182,7 +185,7 @@ async def initialize(self) -> str: self.status = SandboxStatus.ERROR self.add_log(f"Initialization failed: {str(e)}") raise - + async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]: """Execute command in Daytona.""" try: @@ -191,7 +194,7 @@ async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str return (0, f"[daytona] {command} completed", "") except Exception as e: return (1, "", str(e)) - + async def upload_file(self, local_path: str, remote_path: str) -> bool: """Upload file to Daytona.""" try: @@ -200,7 +203,7 @@ async def upload_file(self, local_path: str, remote_path: str) -> bool: except Exception as e: self.add_log(f"Upload failed: {str(e)}") return False - + async def download_file(self, remote_path: str, local_path: str) -> bool: """Download file from Daytona.""" try: @@ -209,7 +212,7 @@ async def download_file(self, remote_path: str, local_path: str) -> bool: except Exception as e: self.add_log(f"Download failed: {str(e)}") return False - + async def terminate(self) -> bool: """Terminate Daytona sandbox.""" try: @@ -223,14 +226,14 @@ async def terminate(self) -> bool: class DockerSandbox(Sandbox): """Docker-based sandbox (local).""" - + async def initialize(self) -> str: """Initialize a Docker sandbox.""" try: import docker - + self.docker_client = docker.from_env() - + # Create container self.container = self.docker_client.containers.run( f"python:{self.config.python_version}", @@ -238,17 +241,15 @@ async def initialize(self) -> str: detach=True, name=f"ai-dev-os-{self.config.name}", working_dir="/workspace", - mounts=[ - docker.types.Mount(path="/workspace", source=str(Path.cwd()), type="bind") - ] + mounts=[docker.types.Mount(path="/workspace", source=str(Path.cwd()), type="bind")], ) - + self.id = self.container.id[:12] self.status = SandboxStatus.READY self.add_log(f"Docker sandbox initialized: {self.id}") - + return self.id - + except ImportError: logger.error("Docker SDK not installed. Install with: pip install docker") self.status = SandboxStatus.ERROR @@ -257,64 +258,62 @@ async def initialize(self) -> str: self.status = SandboxStatus.ERROR self.add_log(f"Initialization failed: {str(e)}") raise - + async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]: """Execute command in Docker container.""" try: self.add_log(f"Executing: {command}") - + exit_code, output = self.container.exec_run( - f"bash -c 'cd {cwd} && {command}'", - stdout=True, - stderr=True + f"bash -c 'cd {cwd} && {command}'", stdout=True, stderr=True ) - + stdout = output.decode() if isinstance(output, bytes) else str(output) - + return (exit_code, stdout, "") - + except Exception as e: self.status = SandboxStatus.ERROR self.add_log(f"Execution failed: {str(e)}") return (1, "", str(e)) - + async def upload_file(self, local_path: str, remote_path: str) -> bool: """Upload file to Docker container.""" try: self.add_log(f"Uploading {local_path} to {remote_path}") - - import tarfile + import io - + import tarfile + # Create tar archive tar_buffer = io.BytesIO() - with tarfile.open(fileobj=tar_buffer, mode='w') as tar: + with tarfile.open(fileobj=tar_buffer, mode="w") as tar: tar.add(local_path, arcname=Path(local_path).name) - + tar_buffer.seek(0) self.container.put_archive(remote_path, tar_buffer) - + return True except Exception as e: self.add_log(f"Upload failed: {str(e)}") return False - + async def download_file(self, remote_path: str, local_path: str) -> bool: """Download file from Docker container.""" try: self.add_log(f"Downloading {remote_path} to {local_path}") - + bits, stat = self.container.get_archive(remote_path) - - with open(local_path, 'wb') as f: + + with open(local_path, "wb") as f: for chunk in bits: f.write(chunk) - + return True except Exception as e: self.add_log(f"Download failed: {str(e)}") return False - + async def terminate(self) -> bool: """Terminate Docker container.""" try: @@ -330,27 +329,27 @@ async def terminate(self) -> bool: class SandboxFactory: """Factory for creating sandboxes.""" - + _providers = { "modal": ModalSandbox, "daytona": DaytonaSandbox, "docker": DockerSandbox, } - + @classmethod async def create(cls, config: SandboxConfig) -> Sandbox: """Create and initialize a sandbox.""" - + if config.provider not in cls._providers: raise ValueError(f"Unknown provider: {config.provider}") - + sandbox_class = cls._providers[config.provider] sandbox = sandbox_class(config) - + await sandbox.initialize() - + return sandbox - + @classmethod def register(cls, provider: str, sandbox_class: type): """Register a new sandbox provider.""" @@ -360,9 +359,5 @@ def register(cls, provider: str, sandbox_class: type): # Convenience factory async def create_sandbox(provider: str, name: str, **kwargs) -> Sandbox: """Convenience function to create a sandbox.""" - config = SandboxConfig( - provider=provider, - name=name, - **kwargs - ) + config = SandboxConfig(provider=provider, name=name, **kwargs) return await SandboxFactory.create(config) diff --git a/src/ai_dev_os/simulation.py b/src/ai_dev_os/simulation.py index 1c35b1c..0ab7e79 100644 --- a/src/ai_dev_os/simulation.py +++ b/src/ai_dev_os/simulation.py @@ -99,7 +99,9 @@ async def run(self) -> SimulationResult: rewards.append(reward) if (i + 1) % 25 == 0: - logger.info(f"Episode {i + 1}/{self.config.episodes} — avg reward: {sum(rewards) / len(rewards):.2f}") + logger.info( + f"Episode {i + 1}/{self.config.episodes} — avg reward: {sum(rewards) / len(rewards):.2f}" + ) elapsed = time.time() - start_time total_steps = self.config.episodes * self.config.max_steps_per_episode diff --git a/src/ai_dev_os/skills.py b/src/ai_dev_os/skills.py index e7945d7..1af3296 100644 --- a/src/ai_dev_os/skills.py +++ b/src/ai_dev_os/skills.py @@ -1,14 +1,15 @@ import logging -from typing import Dict, Any +from typing import Any, Dict logger = logging.getLogger(__name__) + class DebuggingSkill: """ Systematic Debugging Skill for AI Dev OS. Analyzes error traces, instruments code with logging, and identifies root causes. """ - + def __init__(self, name: str = "systematic-debugging"): self.name = name @@ -21,19 +22,21 @@ async def execute(self, context: Dict[str, Any]) -> Dict[str, Any]: """ error_trace = context.get("error_trace", "") logger.info(f"Starting systematic debugging for error: {error_trace[:50]}...") - + # Logic to analyze trace and suggest fixes would go here # For now, we return a structured refinement return { "status": "success", "analysis": "Identified potential null pointer in core.py", - "suggested_fix": "Add null check at line 145" + "suggested_fix": "Add null check at line 145", } + class PerformanceOptimizationSkill: """ Skill for identifying and fixing performance bottlenecks. """ + def __init__(self, name: str = "performance-optimization"): self.name = name @@ -41,20 +44,18 @@ async def execute(self, context: Dict[str, Any]) -> Dict[str, Any]: logger.info("Running performance optimization analysis...") return { "status": "success", - "optimizations": ["Vectorize loop in models.py", "Enable caching for sandbox results"] + "optimizations": ["Vectorize loop in models.py", "Enable caching for sandbox results"], } + class DocumentationGenerationSkill: """ Skill for automatically generating and updating documentation. """ + def __init__(self, name: str = "doc-generation"): self.name = name async def execute(self, context: Dict[str, Any]) -> Dict[str, Any]: logger.info("Generating documentation updates...") - return { - "status": "success", - "updated_files": ["docs/API_REFERENCE.md", "README.md"] - } - + return {"status": "success", "updated_files": ["docs/API_REFERENCE.md", "README.md"]} diff --git a/src/ai_dev_os/utils/error_handling.py b/src/ai_dev_os/utils/error_handling.py index c659126..e250432 100644 --- a/src/ai_dev_os/utils/error_handling.py +++ b/src/ai_dev_os/utils/error_handling.py @@ -4,10 +4,12 @@ logger = logging.getLogger(__name__) + def with_retry(max_retries=3, base_delay=1.0): """ Retry logic decorator. """ + def decorator(func): @wraps(func) async def wrapper(*args, **kwargs): @@ -19,9 +21,11 @@ async def wrapper(*args, **kwargs): last_exception = e logger.warning(f"Attempt {attempt + 1} failed for {func.__name__}: {str(e)}") if attempt < max_retries - 1: - await asyncio.sleep(base_delay * (2 ** attempt)) - + await asyncio.sleep(base_delay * (2**attempt)) + logger.error(f"All {max_retries} attempts failed for {func.__name__}") raise last_exception + return wrapper + return decorator diff --git a/src/ai_dev_os/utils/monitoring.py b/src/ai_dev_os/utils/monitoring.py index a835587..62f3444 100644 --- a/src/ai_dev_os/utils/monitoring.py +++ b/src/ai_dev_os/utils/monitoring.py @@ -1,5 +1,6 @@ import logging + def setup_structured_logging(): """ Stub for structured logging setup. @@ -7,28 +8,31 @@ def setup_structured_logging(): """ logger = logging.getLogger() logger.setLevel(logging.INFO) - + # Check if we already have a JSON-like formatter has_json_formatter = any("Json" in type(h.formatter).__name__ for h in logger.handlers) - + if not has_json_formatter: try: try: from pythonjsonlogger.json import JsonFormatter except ImportError: from pythonjsonlogger import jsonlogger + JsonFormatter = jsonlogger.JsonFormatter - - formatter = JsonFormatter('%(asctime)s %(levelname)s %(name)s %(message)s') + + formatter = JsonFormatter("%(asctime)s %(levelname)s %(name)s %(message)s") logHandler = logging.StreamHandler() logHandler.setFormatter(formatter) logger.addHandler(logHandler) except ImportError: # Only add standard formatter if no handlers exist at all if not logger.handlers: - formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s') + formatter = logging.Formatter( + "%(asctime)s - %(levelname)s - %(name)s - %(message)s" + ) logHandler = logging.StreamHandler() logHandler.setFormatter(formatter) logger.addHandler(logHandler) - + return logger diff --git a/src/integrations/github.py b/src/integrations/github.py index e15a8fd..93152bf 100644 --- a/src/integrations/github.py +++ b/src/integrations/github.py @@ -2,25 +2,26 @@ logger = logging.getLogger(__name__) + class GithubIntegration: """ Handles GitHub PR webhooks and Issue comments for AI Dev OS. """ - + def __init__(self, webhook_secret: str): self.webhook_secret = webhook_secret - + async def handle_comment(self, payload: dict) -> dict: """ Process an incoming GitHub PR comment payload. """ action = payload.get("action") logger.info(f"Received GitHub comment action: {action}") - + comment = payload.get("comment", {}).get("body", "") - + if "@openswe" in comment: logger.info("Triggering orchestrator for GitHub comment") return {"status": "queued", "message": "Addressing feedback"} - + return {"status": "ignored"} diff --git a/src/integrations/github_oauth.py b/src/integrations/github_oauth.py index 6c7e698..57d27ba 100644 --- a/src/integrations/github_oauth.py +++ b/src/integrations/github_oauth.py @@ -92,11 +92,13 @@ async def list_repos(self) -> List[Dict[str, str]]: repos = [] for repo in self.client.get_user().get_repos(): - repos.append({ - "name": repo.full_name, - "url": repo.html_url, - "default_branch": repo.default_branch, - }) + repos.append( + { + "name": repo.full_name, + "url": repo.html_url, + "default_branch": repo.default_branch, + } + ) return repos async def get_open_issues(self, repo_name: str) -> List[Dict[str, Any]]: @@ -108,18 +110,22 @@ async def get_open_issues(self, repo_name: str) -> List[Dict[str, Any]]: repo = self.client.get_repo(repo_name) issues = [] for issue in repo.get_issues(state="open"): - issues.append({ - "number": issue.number, - "title": issue.title, - "body": issue.body or "", - "labels": [l.name for l in issue.labels], - }) + issues.append( + { + "number": issue.number, + "title": issue.title, + "body": issue.body or "", + "labels": [l.name for l in issue.labels], + } + ) return issues except GithubException as e: logger.error(f"Failed to fetch issues: {e}") return [] - async def create_branch(self, repo_name: str, branch_name: str, from_branch: str = "main") -> bool: + async def create_branch( + self, repo_name: str, branch_name: str, from_branch: str = "main" + ) -> bool: """Create a new branch from an existing one.""" if not HAS_GITHUB or not self.client: logger.warning("Simulating branch creation.") diff --git a/src/integrations/linear.py b/src/integrations/linear.py index fea7c02..de156b5 100644 --- a/src/integrations/linear.py +++ b/src/integrations/linear.py @@ -1,38 +1,43 @@ import logging + import httpx + from ai_dev_os.utils.error_handling import with_retry logger = logging.getLogger(__name__) + class LinearIntegration: """ Handles Linear webhooks and API for AI Dev OS. """ - + def __init__(self, webhook_secret: str): if not webhook_secret or webhook_secret.strip() == "": raise ValueError("CRITICAL SECURITY ERROR: Linear webhook secret is missing or empty.") self.webhook_secret = webhook_secret self.api_url = "https://api.linear.app/graphql" - + @with_retry(max_retries=3) async def create_issue(self, title: str, description: str, team_id: str) -> dict: """ Create a true issue in Linear via GraphQL. """ - query = ''' + query = """ mutation IssueCreate($title: String!, $description: String, $teamId: String!) { issueCreate(input: {title: $title, description: $description, teamId: $teamId}) { success issue { id title } } } - ''' + """ variables = {"title": title, "description": description, "teamId": team_id} headers = {"Authorization": self.webhook_secret} - + async with httpx.AsyncClient() as client: - response = await client.post(self.api_url, json={"query": query, "variables": variables}, headers=headers) + response = await client.post( + self.api_url, json={"query": query, "variables": variables}, headers=headers + ) response.raise_for_status() logger.info(f"Linear issue created: {title}") return response.json() @@ -44,7 +49,7 @@ async def handle_issue(self, payload: dict) -> dict: logger.info(f"Received Linear action: {payload.get('action')}") data = payload.get("data", {}) title = data.get("title", "") - - # In a real app, this parses descriptions to find instructions + + # In a real app, this parses descriptions to find instructions logger.info(f"Triggering orchestrator for Linear issue: {title}") return {"status": "processing", "issue_id": data.get("id")} diff --git a/src/integrations/slack.py b/src/integrations/slack.py index 86301a4..8d002c6 100644 --- a/src/integrations/slack.py +++ b/src/integrations/slack.py @@ -1,19 +1,22 @@ import logging + import httpx + from ai_dev_os.utils.error_handling import with_retry logger = logging.getLogger(__name__) + class SlackIntegration: """ Handles Slack incoming webhooks and events for AI Dev OS. """ - + def __init__(self, token: str): if not token or token.strip() == "": raise ValueError("CRITICAL SECURITY ERROR: Slack token is missing or empty.") self.token = token - + @with_retry(max_retries=3) async def send_notification(self, message: str) -> dict: """ @@ -36,5 +39,5 @@ async def handle_message(self, payload: dict) -> dict: # Here we would invoke AIDevOSOrchestrator logger.info(f"Triggering orchestrator for request: {text}") return {"status": "accepted", "message": "Invoking AI Dev OS"} - + return {"status": "ignored", "message": "No trigger found"} diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..1c00ca4 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +import os + +import pytest + + +@pytest.fixture(autouse=True) +def mock_env_vars(): + """Set dummy environment variables for tests.""" + os.environ["ANTHROPIC_API_KEY"] = "sk-ant-dummy-key-for-testing" diff --git a/tests/test_core.py b/tests/test_core.py index ddc52ab..998321d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,27 +1,31 @@ -import sys import os +import sys +from unittest.mock import MagicMock, patch import pytest -from unittest.mock import MagicMock, patch -# Mock Python 3.9+ dependencies that are unavailable -sys.modules['langgraph'] = MagicMock() -sys.modules['langgraph.graph'] = MagicMock() -sys.modules['anthropic'] = MagicMock() +from ai_dev_os.core import ( + AgentConfig, + AIDevOSOrchestrator, + SandboxProvider, + WorkflowPhase, + WorkflowState, +) -from ai_dev_os.core import AIDevOSOrchestrator, WorkflowState, WorkflowPhase, AgentConfig, SandboxProvider @pytest.fixture def mock_anthropic(): with patch("ai_dev_os.core.Anthropic") as mock: yield mock + @pytest.mark.asyncio async def test_orchestrator_initialization(mock_anthropic): orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.DOCKER) assert orchestrator.sandbox_provider == SandboxProvider.DOCKER assert "brainstorming" in orchestrator.skills + @pytest.mark.asyncio async def test_workflow_state_logging(): state = WorkflowState(id="test-1", phase=WorkflowPhase.BRAINSTORMING, user_request="test") @@ -29,6 +33,7 @@ async def test_workflow_state_logging(): assert len(state.logs) == 1 assert "Testing log" in state.logs[0] + @pytest.mark.asyncio async def test_agent_config_defaults(): config = AgentConfig(name="test-agent", role="code", sandbox_provider=SandboxProvider.MODAL) diff --git a/tests/test_core_comprehensive.py b/tests/test_core_comprehensive.py index 04ffaea..a4584a5 100644 --- a/tests/test_core_comprehensive.py +++ b/tests/test_core_comprehensive.py @@ -1,46 +1,42 @@ """ Comprehensive tests for core.py - AIDevOSOrchestrator and related classes. """ -import sys -import os -import pytest -from unittest.mock import MagicMock, patch import json +import os +import sys import tempfile +from unittest.mock import MagicMock, patch + +import pytest try: from unittest.mock import AsyncMock except ImportError: + class AsyncMock(MagicMock): async def __call__(self, *args, **kwargs): return super(AsyncMock, self).__call__(*args, **kwargs) -# Mock heavy dependencies before importing -sys.modules['langgraph'] = MagicMock() -sys.modules['langgraph.graph'] = MagicMock() -sys.modules['anthropic'] = MagicMock() from ai_dev_os.core import ( - AIDevOSOrchestrator, - WorkflowState, - WorkflowPhase, AgentConfig, - SandboxProvider, + AIDevOSOrchestrator, ClaudeHUDIntegration, + SandboxProvider, SubagentOrchestrator, SuperpowerSkill, + WorkflowPhase, + WorkflowState, ) - # ─── Workflow State Tests ─────────────────────────────────────── + class TestWorkflowState: def test_state_initialization(self): state = WorkflowState( - id="test-1", - phase=WorkflowPhase.BRAINSTORMING, - user_request="Build auth module" + id="test-1", phase=WorkflowPhase.BRAINSTORMING, user_request="Build auth module" ) assert state.id == "test-1" assert state.phase == WorkflowPhase.BRAINSTORMING @@ -91,13 +87,10 @@ def test_context_usage(self): # ─── Agent Config Tests ───────────────────────────────────────── + class TestAgentConfig: def test_code_agent_defaults(self): - agent = AgentConfig( - name="code-agent", - role="code", - sandbox_provider=SandboxProvider.DOCKER - ) + agent = AgentConfig(name="code-agent", role="code", sandbox_provider=SandboxProvider.DOCKER) assert "read_file" in agent.tools assert "write_file" in agent.tools assert "execute" in agent.tools @@ -106,9 +99,7 @@ def test_code_agent_defaults(self): def test_training_agent_defaults(self): agent = AgentConfig( - name="training-agent", - role="training", - sandbox_provider=SandboxProvider.MODAL + name="training-agent", role="training", sandbox_provider=SandboxProvider.MODAL ) assert "unsloth_train" in agent.tools assert "bitnet_quantize" in agent.tools @@ -116,9 +107,7 @@ def test_training_agent_defaults(self): def test_simulation_agent_defaults(self): agent = AgentConfig( - name="sim-agent", - role="simulation", - sandbox_provider=SandboxProvider.MODAL + name="sim-agent", role="simulation", sandbox_provider=SandboxProvider.MODAL ) assert "newton_sim" in agent.tools assert "plot_results" in agent.tools @@ -126,33 +115,26 @@ def test_simulation_agent_defaults(self): def test_unknown_role_empty_tools(self): agent = AgentConfig( - name="unknown-agent", - role="unknown", - sandbox_provider=SandboxProvider.DOCKER + name="unknown-agent", role="unknown", sandbox_provider=SandboxProvider.DOCKER ) assert agent.tools == [] def test_custom_max_tokens(self): agent = AgentConfig( - name="test", - role="code", - sandbox_provider=SandboxProvider.DOCKER, - max_tokens=100000 + name="test", role="code", sandbox_provider=SandboxProvider.DOCKER, max_tokens=100000 ) assert agent.max_tokens == 100000 def test_custom_temperature(self): agent = AgentConfig( - name="test", - role="code", - sandbox_provider=SandboxProvider.DOCKER, - temperature=0.3 + name="test", role="code", sandbox_provider=SandboxProvider.DOCKER, temperature=0.3 ) assert agent.temperature == 0.3 # ─── Orchestrator Tests ────────────────────────────────────────── + class TestAIDevOSOrchestrator: @patch("ai_dev_os.core.Anthropic") def test_initialization(self, mock_anthropic): @@ -209,13 +191,12 @@ def test_skills_loaded(self, mock_anthropic): # ─── HUD Integration Tests ────────────────────────────────────── + class TestClaudeHUDIntegration: def test_hud_update_creates_file(self): hud = ClaudeHUDIntegration() state = WorkflowState( - id="hud-test", - phase=WorkflowPhase.EXECUTION, - user_request="test request" + id="hud-test", phase=WorkflowPhase.EXECUTION, user_request="test request" ) state.context_usage = 42.5 @@ -241,6 +222,7 @@ def test_hud_update_empty_agents(self): # ─── Sandbox Provider Tests ───────────────────────────────────── + class TestSandboxProvider: def test_all_providers(self): assert SandboxProvider.MODAL.value == "modal" @@ -251,6 +233,7 @@ def test_all_providers(self): # ─── Workflow Phase Tests ──────────────────────────────────────── + class TestWorkflowPhase: def test_all_phases(self): assert WorkflowPhase.BRAINSTORMING.value == "brainstorming" diff --git a/tests/test_integrations.py b/tests/test_integrations.py index a22b310..43125b3 100644 --- a/tests/test_integrations.py +++ b/tests/test_integrations.py @@ -1,71 +1,64 @@ -import sys import os +import sys +from unittest.mock import MagicMock, patch import pytest -from unittest.mock import MagicMock, patch try: from unittest.mock import AsyncMock except ImportError: + class AsyncMock(MagicMock): async def __call__(self, *args, **kwargs): return super(AsyncMock, self).__call__(*args, **kwargs) + # Mock HTTPX -sys.modules['httpx'] = MagicMock() +sys.modules["httpx"] = MagicMock() # We will test the basic webhook classes that handle standard JSON requests + @pytest.mark.asyncio async def test_slack_webhook(): from integrations.slack import SlackIntegration - + slack = SlackIntegration("dummy_token") # Simulate an incoming message event - payload = { - "type": "message", - "channel": "C12345", - "text": "@openswe fix the login page" - } - + payload = {"type": "message", "channel": "C12345", "text": "@openswe fix the login page"} + # Mock the internal orchestrator call slack.handle_message = AsyncMock(return_value={"status": "accepted"}) result = await slack.handle_message(payload) - + assert result["status"] == "accepted" @pytest.mark.asyncio async def test_linear_webhook(): from integrations.linear import LinearIntegration - + linear = LinearIntegration("dummy_secret") - payload = { - "action": "create", - "data": { - "id": "ISSUE-1", - "title": "Fix the bug in production" - } - } - + payload = {"action": "create", "data": {"id": "ISSUE-1", "title": "Fix the bug in production"}} + linear.handle_issue = AsyncMock(return_value={"status": "processing"}) result = await linear.handle_issue(payload) - + assert result["status"] == "processing" @pytest.mark.asyncio async def test_github_webhook(): from integrations.github import GithubIntegration - + github = GithubIntegration("dummy_secret") payload = { "action": "created", "issue": {"number": 1}, - "comment": {"body": "@openswe Address the review feedback"} + "comment": {"body": "@openswe Address the review feedback"}, } - + github.handle_comment = AsyncMock(return_value={"status": "queued"}) result = await github.handle_comment(payload) - + assert result["status"] == "queued" diff --git a/tests/test_models.py b/tests/test_models.py index 4279000..397b20c 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,17 +1,22 @@ import pytest -from ai_dev_os.models import ModelConfig, UnslothTrainer, QuantizationType + +from ai_dev_os.models import ModelConfig, QuantizationType, UnslothTrainer + @pytest.mark.asyncio async def test_model_config(): - config = ModelConfig(model_name="test-model", task="train", quantization=QuantizationType.BITNET_1BIT) + config = ModelConfig( + model_name="test-model", task="train", quantization=QuantizationType.BITNET_1BIT + ) assert config.model_name == "test-model" assert config.quantization == QuantizationType.BITNET_1BIT + @pytest.mark.asyncio async def test_unsloth_trainer_mock(monkeypatch): config = ModelConfig(model_name="test-model", task="train") trainer = UnslothTrainer(config) - + success, metrics = await trainer.train() assert success is True assert "final_loss" in metrics diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py index fa5c33d..622e911 100644 --- a/tests/test_sandbox.py +++ b/tests/test_sandbox.py @@ -1,5 +1,7 @@ import pytest -from ai_dev_os.sandbox import SandboxConfig, ModalSandbox, SandboxStatus + +from ai_dev_os.sandbox import ModalSandbox, SandboxConfig, SandboxStatus + @pytest.mark.asyncio async def test_sandbox_config(): @@ -7,13 +9,14 @@ async def test_sandbox_config(): assert config.provider == "modal" assert config.gpu is True + @pytest.mark.asyncio async def test_modal_sandbox_mock(): config = SandboxConfig(provider="modal", name="test-sandbox") # Mocking modal import if needed, but the class handles it sandbox = ModalSandbox(config) assert sandbox.status == SandboxStatus.INITIALIZING - + # Since initialize() requires 'modal' package, we might skip or mock it # For this test, we just check the status change in execute exit_code, stdout, stderr = await sandbox.execute("ls") diff --git a/tests/test_skills.py b/tests/test_skills.py index 01fb051..7f2fe0b 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -1,5 +1,11 @@ import pytest -from ai_dev_os.skills import DebuggingSkill, PerformanceOptimizationSkill, DocumentationGenerationSkill + +from ai_dev_os.skills import ( + DebuggingSkill, + DocumentationGenerationSkill, + PerformanceOptimizationSkill, +) + @pytest.mark.asyncio async def test_debugging_skill(): @@ -8,6 +14,7 @@ async def test_debugging_skill(): assert result["status"] == "success" assert "analysis" in result + @pytest.mark.asyncio async def test_performance_skill(): skill = PerformanceOptimizationSkill() @@ -15,6 +22,7 @@ async def test_performance_skill(): assert result["status"] == "success" assert len(result["optimizations"]) > 0 + @pytest.mark.asyncio async def test_doc_skill(): skill = DocumentationGenerationSkill() diff --git a/tests/test_utils.py b/tests/test_utils.py index b9951a5..2c24a28 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,74 +1,83 @@ -import sys +import asyncio import os +import sys +from unittest.mock import MagicMock, patch import pytest -import asyncio -from unittest.mock import MagicMock, patch try: from unittest.mock import AsyncMock except ImportError: + class AsyncMock(MagicMock): async def __call__(self, *args, **kwargs): return super(AsyncMock, self).__call__(*args, **kwargs) + from ai_dev_os.utils.error_handling import with_retry from ai_dev_os.utils.monitoring import setup_structured_logging + @pytest.mark.asyncio async def test_with_retry_success_on_first_try(): mock_func = AsyncMock(return_value="success") - + @with_retry(max_retries=3, base_delay=0.1) async def my_func(): return await mock_func() - + result = await my_func() assert result == "success" assert mock_func.call_count == 1 + @pytest.mark.asyncio async def test_with_retry_success_after_failure(): mock_func = AsyncMock(side_effect=[ValueError("fail"), "success"]) - + @with_retry(max_retries=3, base_delay=0.1) async def my_func(): return await mock_func() - + result = await my_func() assert result == "success" assert mock_func.call_count == 2 + @pytest.mark.asyncio async def test_with_retry_all_failures(): mock_func = AsyncMock(side_effect=ValueError("fail")) - + @with_retry(max_retries=3, base_delay=0.1) async def my_func(): return await mock_func() - + with pytest.raises(ValueError, match="fail"): await my_func() - + assert mock_func.call_count == 3 + def test_setup_structured_logging(): logger = setup_structured_logging() assert logger.level <= 20 # INFO or lower - + try: try: from pythonjsonlogger.json import JsonFormatter except ImportError: from pythonjsonlogger import jsonlogger + JsonFormatter = jsonlogger.JsonFormatter has_json = True except ImportError: has_json = False - + if has_json: # Check if any handler has a formatter that looks like a JSON formatter # Newer versions might have different class names or structures assert any("Json" in type(h.formatter).__name__ for h in logger.handlers) else: - assert any(isinstance(h.formatter, __import__('logging').Formatter) for h in logger.handlers) + assert any( + isinstance(h.formatter, __import__("logging").Formatter) for h in logger.handlers + )