diff --git a/.python-version b/.python-version
index e4fba21..92536a9 100644
--- a/.python-version
+++ b/.python-version
@@ -1 +1 @@
-3.12
+3.12.0
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index c9c6edb..3f2a084 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -13,37 +13,38 @@ AI Dev OS is a unified platform for autonomous AI agent development, combining:
### Unified Platform Architecture
-The following diagram illustrates the integrated flow of the six core technologies within AI Dev OS, as defined in the official platform design:
+The following diagram illustrates the integrated flow of the six core technologies within AI Dev OS, as defined in the official platform design board:
-
+
+**[Official Figma Board: Unified AI Platform Architecture](https://www.figma.com/board/A4TS4yuzBMF9g3IiMcdrBu/unified_ai_platform_architecture?node-id=0-1&t=C9sd6PrCRYsozLOb-0)**
```mermaid
graph TD
- Start((brainstorm)) -->|refinement| B[Superpowers Skill:
Brainstorming + Design]
- B -->|dispatch| C{Approved Spec +
Implementation Plan}
- C --> D[Subagent Orchestration:
LangGraph + Middleware]
+ Start((brainstorm)) -->|refinement| B["Superpowers Skill:
Brainstorming + Design"]
+ B -->|dispatch| C{"Approved Spec +
Implementation Plan"}
+ C --> D["Subagent Orchestration:
LangGraph + Middleware"]
- D -->|spawn Agent C| E[Sim Agent in Sandbox]
- D -->|spawn Agent B| F[Training Agent in Sandbox]
- D -->|spawn Agent A| G[Code Agent in Sandbox]
+ D -->|spawn Agent C| E["Sim Agent (Newton)"]
+ D -->|spawn Agent B| F["Training Agent (Unsloth)"]
+ D -->|spawn Agent A| G["Code Agent (Sandbox)"]
E -->|Newton| H[Newton Engine]
F -->|Unsloth| I[Unsloth Framework]
- H --> J[Physics Simulation:
GPU-accelerated]
- I --> K[Model Training:
2x faster, 70% less VRAM]
+ H -->|metrics| J["Physics Simulation:
GPU-accelerated"]
+ I -->|checkpoints| K["Model Training:
2x faster, 70% less VRAM"]
- J -->|results| L[BitNet.cpp:
1-bit inference]
- K -->|checkpoint| L
+ J -->|results| L["BitNet.cpp:
1-bit inference"]
+ K -->|loss curves| L
- G -->|read/write/test| M[Isolated Cloud Sandbox:
Modal/Daytona]
+ G -->|read/write/test| M["Isolated Cloud Sandbox:
Modal/Daytona"]
L -->|test code| M
- G -->|code ready| N[Superpowers Skills:
TDD + Code Review]
- N -->|verified| O[Merge & Deploy:
Auto-PR + GitHub]
+ G -->|code ready| N["Superpowers Skills:
TDD + Code Review"]
+ N -->|verified| O["Merge & Deploy:
Auto-PR + GitHub"]
O --> Production[[Live Result:
Agent output running]]
- J -->|metrics| P[Claude HUD:
Status + Context + Tools]
+ J -->|metrics| P["Claude HUD:
Status + Context + Tools"]
K -->|loss curves| P
L -->|real-time data| P
M -->|logs| P
diff --git a/README.md b/README.md
index 6734097..6759a1d 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ AI Dev OS is an integrated platform where autonomous AI agents can handle comple
I have integrated the official **Unified AI Platform Architecture** from your design board into the project documentation.
-
+
```
Developer Request (Slack/Linear/CLI)
@@ -261,7 +261,7 @@ Create `~/.claude/plugins/claude-hud/config.json`:
## š Documentation
-- [**ARCHITECTURE.md**](docs/ARCHITECTURE.md) - Deep dive into system design
+- [**ARCHITECTURE.md**](ARCHITECTURE.md) - Deep dive into system design
- [**SETUP_GUIDE.md**](docs/SETUP_GUIDE.md) - Detailed installation for each OS
- [**WORKFLOWS.md**](docs/WORKFLOWS.md) - How to trigger and manage agent workflows
- [**CUSTOMIZATION.md**](docs/CUSTOMIZATION.md) - Extend with custom skills/tools
diff --git a/app/dashboard.py b/app/dashboard.py
index 981311b..42cbb12 100644
--- a/app/dashboard.py
+++ b/app/dashboard.py
@@ -31,6 +31,7 @@
# āāā Authentication āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
+
def check_password():
"""Returns True if the user had the correct password."""
@@ -56,6 +57,7 @@ def password_entered():
else:
return True
+
if not check_password():
st.stop() # Do not continue if check_password is not True.
diff --git a/baseline_test_output.txt b/baseline_test_output.txt
new file mode 100644
index 0000000..6673c91
--- /dev/null
+++ b/baseline_test_output.txt
@@ -0,0 +1,250 @@
+============================= test session starts =============================
+platform win32 -- Python 3.12.0, pytest-9.0.2, pluggy-1.6.0
+rootdir: C:\Users\HASSA\Desktop\AI-DEV-OS
+configfile: pyproject.toml
+testpaths: tests
+plugins: anyio-4.12.1, langsmith-0.7.22, asyncio-1.3.0, cov-7.0.0
+asyncio: mode=Mode.AUTO, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function
+collected 38 items
+
+tests\test_core.py F.. [ 7%]
+tests\test_core_comprehensive.py ..........FFFFFFF.... [ 63%]
+tests\test_integrations.py ... [ 71%]
+tests\test_models.py .. [ 76%]
+tests\test_sandbox.py .. [ 81%]
+tests\test_skills.py ... [ 89%]
+tests\test_utils.py .... [100%]
+
+================================== FAILURES ===================================
+______________________ test_orchestrator_initialization _______________________
+
+mock_anthropic =
+
+ @pytest.mark.asyncio
+ async def test_orchestrator_initialization(mock_anthropic):
+> orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.DOCKER)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core.py:21:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+_________________ TestAIDevOSOrchestrator.test_initialization _________________
+
+self =
+mock_anthropic =
+
+ @patch("ai_dev_os.core.Anthropic")
+ def test_initialization(self, mock_anthropic):
+> orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.DOCKER)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core_comprehensive.py:159:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+_____________ TestAIDevOSOrchestrator.test_determine_agents_code ______________
+
+self =
+mock_anthropic =
+
+ @patch("ai_dev_os.core.Anthropic")
+ def test_determine_agents_code(self, mock_anthropic):
+> orchestrator = AIDevOSOrchestrator()
+ ^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core_comprehensive.py:167:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+___________ TestAIDevOSOrchestrator.test_determine_agents_training ____________
+
+self =
+mock_anthropic =
+
+ @patch("ai_dev_os.core.Anthropic")
+ def test_determine_agents_training(self, mock_anthropic):
+> orchestrator = AIDevOSOrchestrator()
+ ^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core_comprehensive.py:174:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+__________ TestAIDevOSOrchestrator.test_determine_agents_simulation ___________
+
+self =
+mock_anthropic =
+
+ @patch("ai_dev_os.core.Anthropic")
+ def test_determine_agents_simulation(self, mock_anthropic):
+> orchestrator = AIDevOSOrchestrator()
+ ^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core_comprehensive.py:181:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+____________ TestAIDevOSOrchestrator.test_determine_agents_default ____________
+
+self =
+mock_anthropic =
+
+ @patch("ai_dev_os.core.Anthropic")
+ def test_determine_agents_default(self, mock_anthropic):
+> orchestrator = AIDevOSOrchestrator()
+ ^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core_comprehensive.py:188:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+__________ TestAIDevOSOrchestrator.test_determine_agents_multi_role ___________
+
+self =
+mock_anthropic =
+
+ @patch("ai_dev_os.core.Anthropic")
+ def test_determine_agents_multi_role(self, mock_anthropic):
+> orchestrator = AIDevOSOrchestrator()
+ ^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core_comprehensive.py:195:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+_________________ TestAIDevOSOrchestrator.test_skills_loaded __________________
+
+self =
+mock_anthropic =
+
+ @patch("ai_dev_os.core.Anthropic")
+ def test_skills_loaded(self, mock_anthropic):
+> orchestrator = AIDevOSOrchestrator()
+ ^^^^^^^^^^^^^^^^^^^^^
+
+tests\test_core_comprehensive.py:203:
+_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+
+self =
+sandbox_provider =
+
+ def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
+ self.sandbox_provider = sandbox_provider
+ import os
+ api_key = os.getenv("ANTHROPIC_API_KEY")
+ if not api_key:
+> raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+E ValueError: CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.
+
+src\ai_dev_os\core.py:327: ValueError
+============================== warnings summary ===============================
+tests/test_core.py::test_workflow_state_logging
+tests/test_core_comprehensive.py::TestWorkflowState::test_state_initialization
+tests/test_core_comprehensive.py::TestWorkflowState::test_add_log
+tests/test_core_comprehensive.py::TestWorkflowState::test_state_transitions
+tests/test_core_comprehensive.py::TestWorkflowState::test_context_usage
+tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_creates_file
+tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_empty_agents
+ C:\Users\HASSA\Desktop\AI-DEV-OS\src\ai_dev_os\core.py:93: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).
+ self.created_at = datetime.utcnow().isoformat()
+
+tests/test_core.py::test_workflow_state_logging
+tests/test_core_comprehensive.py::TestWorkflowState::test_add_log
+tests/test_core_comprehensive.py::TestWorkflowState::test_add_log
+ C:\Users\HASSA\Desktop\AI-DEV-OS\src\ai_dev_os\core.py:97: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).
+ self.logs.append(f"[{datetime.utcnow().isoformat()}] {message}")
+
+tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_creates_file
+tests/test_core_comprehensive.py::TestClaudeHUDIntegration::test_hud_update_empty_agents
+ C:\Users\HASSA\Desktop\AI-DEV-OS\src\ai_dev_os\core.py:177: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).
+ "timestamp": datetime.utcnow().isoformat(),
+
+-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
+=========================== short test summary info ===========================
+FAILED tests/test_core.py::test_orchestrator_initialization - ValueError: CRI...
+FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_initialization
+FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_code
+FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_training
+FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_simulation
+FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_default
+FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_determine_agents_multi_role
+FAILED tests/test_core_comprehensive.py::TestAIDevOSOrchestrator::test_skills_loaded
+================== 8 failed, 30 passed, 12 warnings in 3.59s ==================
diff --git a/scripts/setup-sandboxes.py b/scripts/setup-sandboxes.py
index 70dfbf6..9153fda 100644
--- a/scripts/setup-sandboxes.py
+++ b/scripts/setup-sandboxes.py
@@ -15,10 +15,7 @@
from pathlib import Path
# Setup logging
-logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
-)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
@@ -26,13 +23,13 @@ def setup_claude_hud_config():
"""Setup Claude HUD configuration."""
hud_config_dir = Path.home() / ".claude" / "plugins" / "claude-hud"
hud_config_dir.mkdir(parents=True, exist_ok=True)
-
+
config_file = hud_config_dir / "config.json"
-
+
if config_file.exists():
logger.info(f"Claude HUD config already exists at {config_file}")
return
-
+
config = {
"lineLayout": "expanded",
"pathLevels": 2,
@@ -41,7 +38,7 @@ def setup_claude_hud_config():
"enabled": True,
"showDirty": True,
"showAheadBehind": False,
- "showFileStats": False
+ "showFileStats": False,
},
"display": {
"showModel": True,
@@ -53,20 +50,20 @@ def setup_claude_hud_config():
"showTools": True,
"showAgents": True,
"showTodos": True,
- "showSessionName": False
+ "showSessionName": False,
},
"colors": {
"context": "cyan",
"usage": "cyan",
"warning": "yellow",
"usageWarning": "brightMagenta",
- "critical": "red"
- }
+ "critical": "red",
+ },
}
-
- with open(config_file, 'w') as f:
+
+ with open(config_file, "w") as f:
json.dump(config, f, indent=2)
-
+
logger.info(f"ā Claude HUD config created at {config_file}")
@@ -81,7 +78,7 @@ def setup_local_directories():
Path.cwd() / ".ai-dev-os" / "data",
Path.cwd() / ".ai-dev-os" / "results",
]
-
+
for directory in dirs:
directory.mkdir(parents=True, exist_ok=True)
logger.info(f"ā Directory created: {directory}")
@@ -90,17 +87,18 @@ def setup_local_directories():
async def test_modal_setup():
"""Test Modal sandbox setup."""
logger.info("Testing Modal setup...")
-
+
try:
import modal
+
logger.info("ā Modal SDK installed")
-
+
# Test authentication
if os.environ.get("MODAL_TOKEN_ID") and os.environ.get("MODAL_TOKEN_SECRET"):
logger.info("ā Modal credentials detected")
else:
logger.warning("ā Modal credentials not found. Run: modal token new")
-
+
return True
except ImportError:
logger.error("ā Modal not installed. Run: pip install modal")
@@ -110,9 +108,10 @@ async def test_modal_setup():
async def test_docker_setup():
"""Test Docker sandbox setup."""
logger.info("Testing Docker setup...")
-
+
try:
import docker
+
client = docker.from_env()
client.ping()
logger.info("ā Docker is running")
@@ -128,21 +127,21 @@ async def test_docker_setup():
async def test_anthropic_setup():
"""Test Anthropic API setup."""
logger.info("Testing Anthropic API setup...")
-
+
try:
import anthropic
-
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
logger.error("ā ANTHROPIC_API_KEY not set")
return False
-
+
client = anthropic.Anthropic()
# Test with a simple message
response = client.messages.create(
model="claude-opus-4-20250514",
max_tokens=10,
- messages=[{"role": "user", "content": "Hi"}]
+ messages=[{"role": "user", "content": "Hi"}],
)
logger.info("ā Anthropic API working")
return True
@@ -154,11 +153,11 @@ async def test_anthropic_setup():
def create_env_file():
"""Create .env file template if it doesn't exist."""
env_file = Path.cwd() / ".env"
-
+
if env_file.exists():
logger.info(f"ā .env file already exists")
return
-
+
env_content = """# AI Dev OS Environment Variables
# Anthropic API
@@ -185,10 +184,10 @@ def create_env_file():
# Development
DEBUG=false
"""
-
- with open(env_file, 'w') as f:
+
+ with open(env_file, "w") as f:
f.write(env_content)
-
+
logger.info(f"ā .env file created. Please edit with your configuration.")
@@ -199,52 +198,48 @@ async def main():
"--provider",
choices=["modal", "docker", "both"],
default="docker",
- help="Sandbox provider to test"
+ help="Sandbox provider to test",
)
- parser.add_argument(
- "--skip-tests",
- action="store_true",
- help="Skip provider tests"
- )
-
+ parser.add_argument("--skip-tests", action="store_true", help="Skip provider tests")
+
args = parser.parse_args()
-
+
logger.info("=" * 60)
logger.info("AI Dev OS Setup")
logger.info("=" * 60)
-
+
# Setup directories and config
logger.info("\n[1/4] Setting up directories...")
setup_local_directories()
-
+
logger.info("\n[2/4] Setting up Claude HUD...")
setup_claude_hud_config()
-
+
logger.info("\n[3/4] Creating .env file...")
create_env_file()
-
+
logger.info("\n[4/4] Testing providers...")
-
+
if not args.skip_tests:
results = {}
-
+
if args.provider in ["modal", "both"]:
results["modal"] = await test_modal_setup()
-
+
if args.provider in ["docker", "both"]:
results["docker"] = await test_docker_setup()
-
+
# Always test Anthropic
results["anthropic"] = await test_anthropic_setup()
-
+
logger.info("\n" + "=" * 60)
logger.info("Setup Status")
logger.info("=" * 60)
-
+
for provider, success in results.items():
status = "ā PASS" if success else "ā FAIL"
logger.info(f"{status}: {provider}")
-
+
logger.info("\n" + "=" * 60)
logger.info("Setup Complete!")
logger.info("=" * 60)
diff --git a/src/ai_dev_os/core.py b/src/ai_dev_os/core.py
index 2b93d78..e620e71 100644
--- a/src/ai_dev_os/core.py
+++ b/src/ai_dev_os/core.py
@@ -7,25 +7,23 @@
import asyncio
import json
import logging
-from dataclasses import dataclass, asdict
+from dataclasses import asdict, dataclass
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple
-from langgraph.graph import StateGraph, START, END
from anthropic import Anthropic
+from langgraph.graph import END, START, StateGraph
# Configure logging
-logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
-)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
class WorkflowPhase(Enum):
"""Stages of the AI Dev OS workflow."""
+
BRAINSTORMING = "brainstorming"
PLANNING = "planning"
EXECUTION = "execution"
@@ -35,6 +33,7 @@ class WorkflowPhase(Enum):
class SandboxProvider(Enum):
"""Supported sandbox providers."""
+
MODAL = "modal"
DAYTONA = "daytona"
RUNLOOP = "runloop"
@@ -44,23 +43,24 @@ class SandboxProvider(Enum):
@dataclass
class AgentConfig:
"""Configuration for a subagent."""
+
name: str
role: str # "code", "training", "simulation"
sandbox_provider: SandboxProvider
max_tokens: int = 50000
temperature: float = 0.7
tools: List[str] = None
-
+
def __post_init__(self):
if self.tools is None:
self.tools = self._default_tools()
-
+
def _default_tools(self) -> List[str]:
"""Return default tools based on role."""
defaults = {
"code": ["read_file", "write_file", "execute", "git_commit", "github_pr"],
"training": ["unsloth_train", "bitnet_quantize", "model_upload"],
- "simulation": ["newton_sim", "plot_results", "upload_metrics"]
+ "simulation": ["newton_sim", "plot_results", "upload_metrics"],
}
return defaults.get(self.role, [])
@@ -68,6 +68,7 @@ def _default_tools(self) -> List[str]:
@dataclass
class WorkflowState:
"""Complete state of a workflow execution."""
+
id: str
phase: WorkflowPhase
user_request: str
@@ -79,7 +80,7 @@ class WorkflowState:
active_agents: List[str] = None
logs: List[str] = None
created_at: str = None
-
+
def __post_init__(self):
if self.subagent_configs is None:
self.subagent_configs = []
@@ -91,7 +92,7 @@ def __post_init__(self):
self.logs = []
if self.created_at is None:
self.created_at = datetime.utcnow().isoformat()
-
+
def add_log(self, message: str):
"""Add a log entry."""
self.logs.append(f"[{datetime.utcnow().isoformat()}] {message}")
@@ -100,17 +101,20 @@ def add_log(self, message: str):
class SuperpowerSkill:
"""Wrapper for Superpowers skills."""
-
+
def __init__(self, name: str, trigger: str, system_prompt: str):
self.name = name
self.trigger = trigger
self.system_prompt = system_prompt
import os
+
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
- raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. Cannot start SuperpowerSkill.")
+ raise ValueError(
+ "CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing. Cannot start SuperpowerSkill."
+ )
self.client = Anthropic(api_key=api_key)
-
+
async def execute(self, state: WorkflowState) -> str:
"""Execute the skill against the current state with caching."""
prompt = f"""
@@ -125,17 +129,17 @@ async def execute(self, state: WorkflowState) -> str:
Generate output for this skill:
"""
-
+
# Caching optimization
import hashlib
import json
-
+
cache_dir = Path.home() / ".ai-dev-os" / "cache"
cache_dir.mkdir(parents=True, exist_ok=True)
-
- prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
+
+ prompt_hash = hashlib.md5(prompt.encode("utf-8")).hexdigest()
cache_file = cache_dir / f"{self.name}_{prompt_hash}.json"
-
+
if cache_file.exists():
state.add_log(f"Cache hit for skill optimization: {self.name}")
try:
@@ -144,33 +148,33 @@ async def execute(self, state: WorkflowState) -> str:
return data.get("result", "")
except json.JSONDecodeError:
pass # Fall back to generation if cache is corrupted
-
+
state.add_log(f"Executing skill: {self.name}")
-
+
response = self.client.messages.create(
model="claude-opus-4-20250514",
max_tokens=4096,
- messages=[{"role": "user", "content": prompt}]
+ messages=[{"role": "user", "content": prompt}],
)
-
+
result = response.content[0].text
-
+
# Save cache
with open(cache_file, "w") as f:
json.dump({"result": result}, f)
-
+
state.add_log(f"Skill {self.name} completed, tokens used: {response.usage.output_tokens}")
-
+
return result
class ClaudeHUDIntegration:
"""Real-time Claude HUD status updates."""
-
+
def __init__(self):
self.status_file = Path.home() / ".ai-dev-os" / "hud_status.json"
self.status_file.parent.mkdir(parents=True, exist_ok=True)
-
+
def update(self, state: WorkflowState, context_usage: float, active_agents: List[str]):
"""Update HUD with current state."""
status = {
@@ -180,32 +184,37 @@ def update(self, state: WorkflowState, context_usage: float, active_agents: List
"active_agents": active_agents,
"recent_logs": state.logs[-3:] if state.logs else [],
}
-
- with open(self.status_file, 'w') as f:
+
+ with open(self.status_file, "w") as f:
json.dump(status, f, indent=2)
-
+
# Format for terminal display
agent_str = ", ".join(active_agents) if active_agents else "none"
- print(f"\n[HUD] Phase: {state.phase.value} | Context: {context_usage:.1f}% | Agents: {agent_str}")
+ print(
+ f"\n[HUD] Phase: {state.phase.value} | Context: {context_usage:.1f}% | Agents: {agent_str}"
+ )
class SubagentOrchestrator:
"""Orchestrates parallel subagent execution."""
-
+
def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
self.sandbox_provider = sandbox_provider
import os
+
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
- raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+ raise ValueError(
+ "CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing."
+ )
self.client = Anthropic(api_key=api_key)
self.hud = ClaudeHUDIntegration()
-
+
async def spawn_agent(self, config: AgentConfig, task_description: str) -> str:
"""Spawn a subagent to handle a specific task."""
-
+
tools_str = "\n".join([f"- {tool}" for tool in config.tools])
-
+
system_prompt = f"""
You are a specialized {config.role} agent in an autonomous development system.
@@ -224,39 +233,39 @@ async def spawn_agent(self, config: AgentConfig, task_description: str) -> str:
Task:
{task_description}
"""
-
+
logger.info(f"Spawning subagent: {config.name} (role: {config.role})")
-
+
response = self.client.messages.create(
model="claude-opus-4-20250514",
max_tokens=config.max_tokens,
temperature=config.temperature,
system=system_prompt,
- messages=[{"role": "user", "content": "Begin execution."}]
+ messages=[{"role": "user", "content": "Begin execution."}],
)
-
+
result = response.content[0].text
logger.info(f"Subagent {config.name} completed")
-
+
return result
-
+
async def orchestrate(self, state: WorkflowState) -> WorkflowState:
"""Orchestrate all subagents in parallel."""
-
+
state.add_log(f"Starting parallel execution of {len(state.subagent_configs)} agents")
state.phase = WorkflowPhase.EXECUTION
-
+
# Update HUD
agent_names = [cfg.name for cfg in state.subagent_configs]
self.hud.update(state, state.context_usage, agent_names)
-
+
# Execute all agents in parallel
tasks = []
for config in state.subagent_configs:
task_desc = self._generate_task_description(state, config)
task = self.spawn_agent(config, task_desc)
tasks.append((config.name, task))
-
+
# Gather results
results = {}
for agent_name, task in tasks:
@@ -267,18 +276,18 @@ async def orchestrate(self, state: WorkflowState) -> WorkflowState:
except Exception as e:
state.add_log(f"Agent {agent_name} failed: {str(e)}")
results[agent_name] = f"ERROR: {str(e)}"
-
+
state.execution_results = results
state.add_log("Parallel execution completed")
-
+
# Update HUD
self.hud.update(state, state.context_usage, [])
-
+
return state
-
+
def _generate_task_description(self, state: WorkflowState, config: AgentConfig) -> str:
"""Generate specific task description for an agent."""
-
+
task_descriptions = {
"code": f"""
Implement the following plan:
@@ -310,33 +319,36 @@ def _generate_task_description(self, state: WorkflowState, config: AgentConfig)
- Measure success rate and stability
- Generate plots and metrics
- Report results for validation
-"""
+""",
}
-
+
return task_descriptions.get(config.role, "Execute this task: " + state.implementation_plan)
class AIDevOSOrchestrator:
"""Main orchestrator for the entire AI Dev OS system."""
-
+
def __init__(self, sandbox_provider: SandboxProvider = SandboxProvider.MODAL):
self.sandbox_provider = sandbox_provider
import os
+
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
- raise ValueError("CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing.")
+ raise ValueError(
+ "CRITICAL SECURITY ERROR: ANTHROPIC_API_KEY environment variable is missing."
+ )
self.client = Anthropic(api_key=api_key)
self.hud = ClaudeHUDIntegration()
-
+
# Initialize Superpowers skills
self.skills = self._load_skills()
-
+
# Subagent orchestrator
self.subagent_orchestrator = SubagentOrchestrator(sandbox_provider)
-
+
# Load AGENTS.md rules
self.agents_rules = self._load_agents_rules()
-
+
def _load_skills(self) -> Dict[str, SuperpowerSkill]:
"""Load Superpowers skills."""
return {
@@ -347,7 +359,7 @@ def _load_skills(self) -> Dict[str, SuperpowerSkill]:
You are a brainstorming expert. Help refine the user's idea through Socratic questioning.
Ask clarifying questions, explore alternatives, and present the design in digestible chunks.
Output: A clear design document with requirements, architecture, and acceptance criteria.
-"""
+""",
),
"planning": SuperpowerSkill(
name="planning",
@@ -356,7 +368,7 @@ def _load_skills(self) -> Dict[str, SuperpowerSkill]:
You are a project planning expert. Break the design into bite-sized tasks (2-5 min each).
Each task must include: exact file paths, complete code snippets, and verification steps.
Output: A detailed implementation plan with task list and dependencies.
-"""
+""",
),
"code-review": SuperpowerSkill(
name="code-review",
@@ -365,21 +377,21 @@ def _load_skills(self) -> Dict[str, SuperpowerSkill]:
You are a code reviewer. Check the implementation against the plan.
Report issues by severity: critical (blocks merge), major (should fix), minor (nice to have).
Output: Review report with issues and recommendations.
-"""
- )
+""",
+ ),
}
-
+
def _load_agents_rules(self) -> Dict[str, Any]:
"""Load AGENTS.md rules from repo."""
agents_md = Path.cwd() / "AGENTS.md"
-
+
if not agents_md.exists():
logger.warning("AGENTS.md not found, using defaults")
return {}
-
+
# Parse AGENTS.md (simplified - in production use proper markdown parser)
content = agents_md.read_text()
-
+
rules = {
"raw": content,
"enforce_brainstorming": "brainstorming: REQUIRED" in content,
@@ -387,141 +399,138 @@ def _load_agents_rules(self) -> Dict[str, Any]:
"enforce_tdd": "test-driven-development: REQUIRED" in content,
"enforce_review": "requesting-code-review: REQUIRED" in content,
}
-
+
return rules
-
+
async def run(self, user_request: str) -> WorkflowState:
"""Main entry point: run a complete workflow."""
-
+
# Initialize workflow state
import uuid
+
state = WorkflowState(
- id=str(uuid.uuid4()),
- phase=WorkflowPhase.BRAINSTORMING,
- user_request=user_request
+ id=str(uuid.uuid4()), phase=WorkflowPhase.BRAINSTORMING, user_request=user_request
)
-
+
state.add_log(f"Starting workflow for request: {user_request}")
self.hud.update(state, state.context_usage, [])
-
+
# Phase 1: Brainstorming
logger.info("=" * 60)
logger.info("PHASE 1: BRAINSTORMING")
logger.info("=" * 60)
-
+
design_doc = await self.skills["brainstorming"].execute(state)
state.design_doc = design_doc
state.add_log("Design doc generated")
-
+
print("\nš DESIGN DOCUMENT:\n")
print(design_doc)
print("\n" + "=" * 60)
-
+
# Ask for approval
user_approval = input("\nApprove design? (yes/no): ").lower().strip()
if user_approval != "yes":
state.add_log("Design rejected by user")
return state
-
+
state.add_log("Design approved by user")
-
+
# Phase 2: Planning
logger.info("=" * 60)
logger.info("PHASE 2: PLANNING")
logger.info("=" * 60)
-
+
state.phase = WorkflowPhase.PLANNING
plan = await self.skills["planning"].execute(state)
state.implementation_plan = plan
state.add_log("Implementation plan generated")
-
+
print("\nš IMPLEMENTATION PLAN:\n")
print(plan)
print("\n" + "=" * 60)
-
+
# Phase 3: Execution (Subagents)
logger.info("=" * 60)
logger.info("PHASE 3: EXECUTION (Subagents)")
logger.info("=" * 60)
-
+
# Determine which agents we need
state.subagent_configs = self._determine_agents(user_request)
-
+
state = await self.subagent_orchestrator.orchestrate(state)
-
+
# Phase 4: Validation & Code Review
logger.info("=" * 60)
logger.info("PHASE 4: VALIDATION & CODE REVIEW")
logger.info("=" * 60)
-
+
state.phase = WorkflowPhase.VALIDATION
review = await self.skills["code-review"].execute(state)
state.add_log("Code review completed")
-
+
print("\nā
CODE REVIEW:\n")
print(review)
-
+
# Phase 5: Merge (in production, this auto-creates PR)
logger.info("=" * 60)
logger.info("PHASE 5: MERGE")
logger.info("=" * 60)
-
+
state.phase = WorkflowPhase.MERGE
state.add_log("Workflow completed successfully")
-
+
print("\nš Workflow completed! PR ready for review.")
-
+
return state
-
+
def _determine_agents(self, user_request: str) -> List[AgentConfig]:
"""Determine which agents are needed for this request."""
-
+
request_lower = user_request.lower()
agents = []
-
+
# Heuristic: detect what kind of task this is
if any(word in request_lower for word in ["code", "build", "feature", "fix", "test"]):
- agents.append(AgentConfig(
- name="code-agent",
- role="code",
- sandbox_provider=self.sandbox_provider
- ))
-
+ agents.append(
+ AgentConfig(name="code-agent", role="code", sandbox_provider=self.sandbox_provider)
+ )
+
if any(word in request_lower for word in ["train", "finetune", "model", "lora"]):
- agents.append(AgentConfig(
- name="training-agent",
- role="training",
- sandbox_provider=self.sandbox_provider
- ))
-
+ agents.append(
+ AgentConfig(
+ name="training-agent", role="training", sandbox_provider=self.sandbox_provider
+ )
+ )
+
if any(word in request_lower for word in ["simul", "robot", "physic", "test"]):
- agents.append(AgentConfig(
- name="simulation-agent",
- role="simulation",
- sandbox_provider=self.sandbox_provider
- ))
-
+ agents.append(
+ AgentConfig(
+ name="simulation-agent",
+ role="simulation",
+ sandbox_provider=self.sandbox_provider,
+ )
+ )
+
# Default to code agent if unclear
if not agents:
- agents.append(AgentConfig(
- name="code-agent",
- role="code",
- sandbox_provider=self.sandbox_provider
- ))
-
+ agents.append(
+ AgentConfig(name="code-agent", role="code", sandbox_provider=self.sandbox_provider)
+ )
+
return agents
async def main():
"""Example main function."""
-
+
orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.MODAL)
-
+
# Example request
user_request = "Build a simple authentication module with tests and documentation"
-
+
state = await orchestrator.run(user_request)
-
+
# Print summary
print("\n" + "=" * 60)
print("WORKFLOW SUMMARY")
@@ -530,12 +539,12 @@ async def main():
print(f"Status: {'COMPLETED' if state.phase == WorkflowPhase.MERGE else 'IN PROGRESS'}")
print(f"Total logs: {len(state.logs)}")
print(f"Agents used: {len(state.subagent_configs)}")
-
+
# Save state for reference
state_file = Path.home() / ".ai-dev-os" / f"workflow_{state.id}.json"
state_file.parent.mkdir(parents=True, exist_ok=True)
-
- with open(state_file, 'w') as f:
+
+ with open(state_file, "w") as f:
# Convert dataclasses to dicts for JSON serialization
state_dict = {
"id": state.id,
@@ -545,10 +554,10 @@ async def main():
"implementation_plan": state.implementation_plan,
"execution_results": state.execution_results,
"created_at": state.created_at,
- "logs": state.logs
+ "logs": state.logs,
}
json.dump(state_dict, f, indent=2)
-
+
print(f"\nWorkflow state saved to: {state_file}")
diff --git a/src/ai_dev_os/models.py b/src/ai_dev_os/models.py
index 10ba232..aabaa2c 100644
--- a/src/ai_dev_os/models.py
+++ b/src/ai_dev_os/models.py
@@ -5,7 +5,7 @@
import asyncio
import json
import logging
-from dataclasses import dataclass, asdict
+from dataclasses import asdict, dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@@ -17,6 +17,7 @@
class QuantizationType(Enum):
"""Quantization types supported."""
+
FLOAT16 = "float16"
INT8 = "int8"
INT4 = "int4"
@@ -27,6 +28,7 @@ class QuantizationType(Enum):
@dataclass
class ModelConfig:
"""Configuration for model training or inference."""
+
model_name: str # huggingface model ID
task: str # "train" or "inference"
quantization: QuantizationType = QuantizationType.INT4
@@ -35,11 +37,11 @@ class ModelConfig:
learning_rate: float = 5e-5
num_epochs: int = 3
output_dir: str = "./models"
-
+
# For training
dataset_path: Optional[str] = None
validation_split: float = 0.1
-
+
# For inference
temperature: float = 0.7
top_p: float = 0.9
@@ -48,50 +50,52 @@ class ModelConfig:
class UnslothTrainer:
"""Wrapper for Unsloth training."""
-
+
def __init__(self, config: ModelConfig):
self.config = config
self.model = None
self.tokenizer = None
self.trainer = None
self.training_logs: List[Dict[str, Any]] = []
-
+
async def setup(self) -> bool:
"""
Setup Unsloth trainer.
"""
try:
logger.info(f"Setting up Unsloth trainer for {self.config.model_name}")
-
+
try:
from unsloth import FastLanguageModel
+
# Apply 4-bit load or appropriate config
max_seq_length = self.config.max_seq_length
self.model, self.tokenizer = FastLanguageModel.from_pretrained(
- model_name = self.config.model_name,
- max_seq_length = max_seq_length,
- dtype = None,
- load_in_4bit = self.config.quantization.value == "int4",
+ model_name=self.config.model_name,
+ max_seq_length=max_seq_length,
+ dtype=None,
+ load_in_4bit=self.config.quantization.value == "int4",
)
except ImportError:
logger.warning("Unsloth not installed. Simulating loading.")
await asyncio.sleep(1)
-
- self.training_logs.append({
- "stage": "setup",
- "status": "success",
- "model": self.config.model_name,
- "quantization": self.config.quantization.value
- })
-
+ self.training_logs.append(
+ {
+ "stage": "setup",
+ "status": "success",
+ "model": self.config.model_name,
+ "quantization": self.config.quantization.value,
+ }
+ )
+
logger.info("Unsloth trainer ready")
return True
-
+
except Exception as e:
logger.error(f"Setup failed: {str(e)}")
return False
-
+
async def train(self) -> Tuple[bool, Dict[str, Any]]:
"""
Run training with Unsloth (2x faster, 70% less VRAM).
@@ -100,27 +104,29 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]:
try:
if not await self.setup():
return False, {}
-
+
logger.info(f"Starting training on {self.config.model_name}")
logger.info(f"Quantization: {self.config.quantization.value}")
logger.info(f"Batch size: {self.config.batch_size}")
-
+
metrics = {}
-
+
try:
from unsloth import FastLanguageModel
+
FastLanguageModel.for_training(self.model)
-
+
+ from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
- from datasets import load_dataset
-
+
# Load dataset
- dataset = load_dataset(
- self.config.dataset_path,
- split="train"
- ) if self.config.dataset_path else None
-
+ dataset = (
+ load_dataset(self.config.dataset_path, split="train")
+ if self.config.dataset_path
+ else None
+ )
+
training_args = TrainingArguments(
per_device_train_batch_size=self.config.batch_size,
learning_rate=self.config.learning_rate,
@@ -130,7 +136,7 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]:
logging_steps=10,
fp16=True,
)
-
+
trainer = SFTTrainer(
model=self.model,
tokenizer=self.tokenizer,
@@ -139,24 +145,22 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]:
args=training_args,
max_seq_length=self.config.max_seq_length,
)
-
+
# Train
train_result = trainer.train()
-
+
metrics = {
"final_loss": train_result.training_loss,
"train_loss_history": [
- log.get("loss", 0)
- for log in trainer.state.log_history
- if "loss" in log
+ log.get("loss", 0) for log in trainer.state.log_history if "loss" in log
],
"validation_loss": train_result.metrics.get("eval_loss", 0),
- "perplexity": 2 ** train_result.training_loss,
+ "perplexity": 2**train_result.training_loss,
"training_time_minutes": train_result.metrics.get("train_runtime", 0) / 60,
"speedup_vs_standard": 2.15,
"vram_reduction_percent": 68.5,
}
-
+
except ImportError:
logger.warning("Unsloth/transformers not installed. Using simulated training.")
await asyncio.sleep(1)
@@ -170,63 +174,59 @@ async def train(self) -> Tuple[bool, Dict[str, Any]]:
"speedup_vs_standard": 2.15,
"vram_reduction_percent": 68.5,
}
-
- self.training_logs.append({
- "stage": "training",
- "status": "success",
- **metrics
- })
-
+
+ self.training_logs.append({"stage": "training", "status": "success", **metrics})
+
logger.info(f"Training completed. Loss: {metrics['final_loss']}")
logger.info(f"VRAM savings: {metrics.get('vram_reduction_percent', 0):.1f}%")
-
+
return True, metrics
-
+
except Exception as e:
logger.error(f"Training failed: {str(e)}")
return False, {}
-
+
async def save_checkpoint(self, path: str) -> bool:
"""Save trained model checkpoint."""
try:
output_path = Path(path)
output_path.mkdir(parents=True, exist_ok=True)
-
+
# Save model config
config_file = output_path / "config.json"
- with open(config_file, 'w') as f:
+ with open(config_file, "w") as f:
json.dump(asdict(self.config), f, indent=2, default=str)
-
+
# Save training logs
logs_file = output_path / "training_logs.json"
- with open(logs_file, 'w') as f:
+ with open(logs_file, "w") as f:
json.dump(self.training_logs, f, indent=2)
-
+
logger.info(f"Checkpoint saved to {output_path}")
return True
-
+
except Exception as e:
logger.error(f"Save failed: {str(e)}")
return False
-
+
async def quantize_to_bitnet(self, path: str) -> bool:
"""Quantize trained model to BitNet 1.58-bit format."""
try:
logger.info("Starting BitNet quantization")
-
+
output_path = Path(path) / "bitnet_model.gguf"
output_path.parent.mkdir(parents=True, exist_ok=True)
-
+
# In production, use bitnet.cpp conversion utilities
# For now, simulate
await asyncio.sleep(2)
-
+
# Create dummy GGUF file
output_path.touch()
-
+
logger.info(f"BitNet model saved to {output_path}")
return True
-
+
except Exception as e:
logger.error(f"Quantization failed: {str(e)}")
return False
@@ -234,19 +234,20 @@ async def quantize_to_bitnet(self, path: str) -> bool:
class BitNetInference:
"""BitNet 1-bit LLM inference engine."""
-
+
def __init__(self, model_path: str):
self.model_path = model_path
self.model = None
self.context_tokens: List[int] = []
-
+
async def load(self) -> bool:
"""Load BitNet model."""
try:
logger.info(f"Loading BitNet model from {self.model_path}")
-
+
try:
from llama_cpp import Llama
+
self.model = Llama(
model_path=self.model_path,
n_ctx=4096,
@@ -257,20 +258,15 @@ async def load(self) -> bool:
logger.warning("llama_cpp not installed. Simulating BitNet inference loading.")
await asyncio.sleep(0.5)
-
logger.info("Model loaded successfully")
return True
-
+
except Exception as e:
logger.error(f"Load failed: {str(e)}")
return False
-
+
async def infer(
- self,
- prompt: str,
- max_tokens: int = 512,
- temperature: float = 0.7,
- top_p: float = 0.9
+ self, prompt: str, max_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9
) -> Tuple[bool, str]:
"""
Run inference on 1-bit model.
@@ -279,27 +275,26 @@ async def infer(
try:
if not self.model:
await self.load()
-
+
logger.info(f"Running inference: {prompt[:50]}...")
-
+
if hasattr(self.model, "create_completion"):
output = self.model.create_completion(
- prompt,
- max_tokens=max_tokens,
- temperature=temperature,
- top_p=top_p
+ prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p
)["choices"][0]["text"]
else:
await asyncio.sleep(0.1) # Simulate inference time
output = f"[BitNet inference result for: {prompt[:20]}...]"
-
+
return True, output
-
+
except Exception as e:
logger.error(f"Inference failed: {str(e)}")
return False, ""
-
- async def batch_infer(self, prompts: List[str], max_tokens: int = 512) -> Tuple[bool, List[str]]:
+
+ async def batch_infer(
+ self, prompts: List[str], max_tokens: int = 512
+ ) -> Tuple[bool, List[str]]:
"""Batch inference (more efficient)."""
try:
results = []
@@ -308,9 +303,9 @@ async def batch_infer(self, prompts: List[str], max_tokens: int = 512) -> Tuple[
if not success:
return False, []
results.append(output)
-
+
return True, results
-
+
except Exception as e:
logger.error(f"Batch inference failed: {str(e)}")
return False, []
@@ -318,62 +313,59 @@ async def batch_infer(self, prompts: List[str], max_tokens: int = 512) -> Tuple[
class ModelManager:
"""High-level model management (training + inference)."""
-
+
def __init__(self):
self.trainers: Dict[str, UnslothTrainer] = {}
self.inference_engines: Dict[str, BitNetInference] = {}
-
+
async def train_model(self, config: ModelConfig) -> Tuple[bool, Dict[str, Any]]:
"""Train a model with Unsloth."""
trainer = UnslothTrainer(config)
self.trainers[config.model_name] = trainer
-
+
success, metrics = await trainer.train()
-
+
if success:
# Save checkpoint
output_dir = Path(config.output_dir) / config.model_name
await trainer.save_checkpoint(str(output_dir))
-
+
# Optionally quantize to BitNet
if config.quantization in [QuantizationType.BITNET_1BIT, QuantizationType.BITNET_1P58]:
await trainer.quantize_to_bitnet(str(output_dir))
-
+
return success, metrics
-
+
async def load_inference_engine(self, model_path: str, model_id: str) -> bool:
"""Load a BitNet model for inference."""
engine = BitNetInference(model_path)
self.inference_engines[model_id] = engine
return await engine.load()
-
+
async def infer(self, model_id: str, prompt: str, max_tokens: int = 512) -> Tuple[bool, str]:
"""Run inference."""
if model_id not in self.inference_engines:
logger.error(f"Model {model_id} not loaded")
return False, ""
-
+
return await self.inference_engines[model_id].infer(prompt, max_tokens=max_tokens)
-
+
def get_training_stats(self, model_name: str) -> Optional[Dict[str, Any]]:
"""Get training statistics."""
if model_name not in self.trainers:
return None
-
+
logs = self.trainers[model_name].training_logs
if not logs:
return None
-
+
training_log = [log for log in logs if log.get("stage") == "training"]
return training_log[0] if training_log else None
# Convenience functions
async def train_model(
- model_name: str,
- dataset_path: str,
- quantization: str = "int4",
- **kwargs
+ model_name: str, dataset_path: str, quantization: str = "int4", **kwargs
) -> Tuple[bool, Dict[str, Any]]:
"""Convenience function for training."""
config = ModelConfig(
@@ -381,9 +373,9 @@ async def train_model(
task="train",
dataset_path=dataset_path,
quantization=QuantizationType[quantization.upper()],
- **kwargs
+ **kwargs,
)
-
+
manager = ModelManager()
return await manager.train_model(config)
@@ -393,5 +385,5 @@ async def inference(model_path: str, prompt: str, max_tokens: int = 512) -> Tupl
engine = BitNetInference(model_path)
if not await engine.load():
return False, ""
-
+
return await engine.infer(prompt, max_tokens=max_tokens)
diff --git a/src/ai_dev_os/sandbox.py b/src/ai_dev_os/sandbox.py
index bf2e5a1..b4f6eb9 100644
--- a/src/ai_dev_os/sandbox.py
+++ b/src/ai_dev_os/sandbox.py
@@ -16,6 +16,7 @@
class SandboxStatus(Enum):
"""Status of a sandbox."""
+
INITIALIZING = "initializing"
READY = "ready"
RUNNING = "running"
@@ -26,6 +27,7 @@ class SandboxStatus(Enum):
@dataclass
class SandboxConfig:
"""Configuration for sandbox creation."""
+
provider: str # "modal", "daytona", "runloop", "docker"
name: str
python_version: str = "3.10"
@@ -35,7 +37,7 @@ class SandboxConfig:
gpu_type: Optional[str] = None # "a100", "h100", etc.
env_vars: Dict[str, str] = None
mounts: Dict[str, str] = None # local_path -> container_path
-
+
def __post_init__(self):
if self.env_vars is None:
self.env_vars = {}
@@ -45,18 +47,18 @@ def __post_init__(self):
class Sandbox(ABC):
"""Abstract base class for sandboxes."""
-
+
def __init__(self, config: SandboxConfig):
self.config = config
self.id: Optional[str] = None
self.status = SandboxStatus.INITIALIZING
self.logs: List[str] = []
-
+
@abstractmethod
async def initialize(self) -> str:
"""Initialize the sandbox. Returns sandbox ID."""
pass
-
+
@abstractmethod
async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]:
"""
@@ -64,22 +66,22 @@ async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str
Returns: (exit_code, stdout, stderr)
"""
pass
-
+
@abstractmethod
async def upload_file(self, local_path: str, remote_path: str) -> bool:
"""Upload a file to the sandbox."""
pass
-
+
@abstractmethod
async def download_file(self, remote_path: str, local_path: str) -> bool:
"""Download a file from the sandbox."""
pass
-
+
@abstractmethod
async def terminate(self) -> bool:
"""Terminate the sandbox."""
pass
-
+
def add_log(self, message: str):
"""Add a log entry."""
self.logs.append(message)
@@ -88,54 +90,55 @@ def add_log(self, message: str):
class ModalSandbox(Sandbox):
"""Modal-based sandbox (https://modal.com)."""
-
+
async def initialize(self) -> str:
"""Initialize a Modal sandbox."""
try:
import modal
-
+
# Create Modal app
self.app = modal.App(name=f"ai-dev-os-{self.config.name}")
-
+
# Define environment
- image = modal.Image.debian_slim(python_version=self.config.python_version) \
- .pip_install("anthropic", "langgraph", "torch", "transformers")
-
+ image = modal.Image.debian_slim(python_version=self.config.python_version).pip_install(
+ "anthropic", "langgraph", "torch", "transformers"
+ )
+
self.app.function(
image=image,
timeout=self.config.timeout_seconds,
gpu=modal.gpu.A100() if self.config.gpu else None,
- allow_concurrent_inputs=10
+ allow_concurrent_inputs=10,
)
-
+
self.id = self.app.name
self.status = SandboxStatus.READY
self.add_log(f"Modal sandbox initialized: {self.id}")
-
+
return self.id
-
+
except ImportError:
logger.error("Modal not installed. Install with: pip install modal")
self.status = SandboxStatus.ERROR
raise
-
+
async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]:
"""Execute command in Modal."""
try:
# In production, use modal.run to execute
# For now, return mock response
self.add_log(f"Executing: {command}")
-
+
# Mock execution
await asyncio.sleep(0.5)
-
+
return (0, f"[mock] {command} completed", "")
-
+
except Exception as e:
self.status = SandboxStatus.ERROR
self.add_log(f"Execution failed: {str(e)}")
return (1, "", str(e))
-
+
async def upload_file(self, local_path: str, remote_path: str) -> bool:
"""Upload file to Modal sandbox."""
try:
@@ -145,7 +148,7 @@ async def upload_file(self, local_path: str, remote_path: str) -> bool:
except Exception as e:
self.add_log(f"Upload failed: {str(e)}")
return False
-
+
async def download_file(self, remote_path: str, local_path: str) -> bool:
"""Download file from Modal sandbox."""
try:
@@ -155,7 +158,7 @@ async def download_file(self, remote_path: str, local_path: str) -> bool:
except Exception as e:
self.add_log(f"Download failed: {str(e)}")
return False
-
+
async def terminate(self) -> bool:
"""Terminate Modal sandbox."""
try:
@@ -169,7 +172,7 @@ async def terminate(self) -> bool:
class DaytonaSandbox(Sandbox):
"""Daytona-based sandbox (https://daytona.io)."""
-
+
async def initialize(self) -> str:
"""Initialize a Daytona sandbox."""
try:
@@ -182,7 +185,7 @@ async def initialize(self) -> str:
self.status = SandboxStatus.ERROR
self.add_log(f"Initialization failed: {str(e)}")
raise
-
+
async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]:
"""Execute command in Daytona."""
try:
@@ -191,7 +194,7 @@ async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str
return (0, f"[daytona] {command} completed", "")
except Exception as e:
return (1, "", str(e))
-
+
async def upload_file(self, local_path: str, remote_path: str) -> bool:
"""Upload file to Daytona."""
try:
@@ -200,7 +203,7 @@ async def upload_file(self, local_path: str, remote_path: str) -> bool:
except Exception as e:
self.add_log(f"Upload failed: {str(e)}")
return False
-
+
async def download_file(self, remote_path: str, local_path: str) -> bool:
"""Download file from Daytona."""
try:
@@ -209,7 +212,7 @@ async def download_file(self, remote_path: str, local_path: str) -> bool:
except Exception as e:
self.add_log(f"Download failed: {str(e)}")
return False
-
+
async def terminate(self) -> bool:
"""Terminate Daytona sandbox."""
try:
@@ -223,14 +226,14 @@ async def terminate(self) -> bool:
class DockerSandbox(Sandbox):
"""Docker-based sandbox (local)."""
-
+
async def initialize(self) -> str:
"""Initialize a Docker sandbox."""
try:
import docker
-
+
self.docker_client = docker.from_env()
-
+
# Create container
self.container = self.docker_client.containers.run(
f"python:{self.config.python_version}",
@@ -238,17 +241,15 @@ async def initialize(self) -> str:
detach=True,
name=f"ai-dev-os-{self.config.name}",
working_dir="/workspace",
- mounts=[
- docker.types.Mount(path="/workspace", source=str(Path.cwd()), type="bind")
- ]
+ mounts=[docker.types.Mount(path="/workspace", source=str(Path.cwd()), type="bind")],
)
-
+
self.id = self.container.id[:12]
self.status = SandboxStatus.READY
self.add_log(f"Docker sandbox initialized: {self.id}")
-
+
return self.id
-
+
except ImportError:
logger.error("Docker SDK not installed. Install with: pip install docker")
self.status = SandboxStatus.ERROR
@@ -257,64 +258,62 @@ async def initialize(self) -> str:
self.status = SandboxStatus.ERROR
self.add_log(f"Initialization failed: {str(e)}")
raise
-
+
async def execute(self, command: str, cwd: str = "/workspace") -> Tuple[int, str, str]:
"""Execute command in Docker container."""
try:
self.add_log(f"Executing: {command}")
-
+
exit_code, output = self.container.exec_run(
- f"bash -c 'cd {cwd} && {command}'",
- stdout=True,
- stderr=True
+ f"bash -c 'cd {cwd} && {command}'", stdout=True, stderr=True
)
-
+
stdout = output.decode() if isinstance(output, bytes) else str(output)
-
+
return (exit_code, stdout, "")
-
+
except Exception as e:
self.status = SandboxStatus.ERROR
self.add_log(f"Execution failed: {str(e)}")
return (1, "", str(e))
-
+
async def upload_file(self, local_path: str, remote_path: str) -> bool:
"""Upload file to Docker container."""
try:
self.add_log(f"Uploading {local_path} to {remote_path}")
-
- import tarfile
+
import io
-
+ import tarfile
+
# Create tar archive
tar_buffer = io.BytesIO()
- with tarfile.open(fileobj=tar_buffer, mode='w') as tar:
+ with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
tar.add(local_path, arcname=Path(local_path).name)
-
+
tar_buffer.seek(0)
self.container.put_archive(remote_path, tar_buffer)
-
+
return True
except Exception as e:
self.add_log(f"Upload failed: {str(e)}")
return False
-
+
async def download_file(self, remote_path: str, local_path: str) -> bool:
"""Download file from Docker container."""
try:
self.add_log(f"Downloading {remote_path} to {local_path}")
-
+
bits, stat = self.container.get_archive(remote_path)
-
- with open(local_path, 'wb') as f:
+
+ with open(local_path, "wb") as f:
for chunk in bits:
f.write(chunk)
-
+
return True
except Exception as e:
self.add_log(f"Download failed: {str(e)}")
return False
-
+
async def terminate(self) -> bool:
"""Terminate Docker container."""
try:
@@ -330,27 +329,27 @@ async def terminate(self) -> bool:
class SandboxFactory:
"""Factory for creating sandboxes."""
-
+
_providers = {
"modal": ModalSandbox,
"daytona": DaytonaSandbox,
"docker": DockerSandbox,
}
-
+
@classmethod
async def create(cls, config: SandboxConfig) -> Sandbox:
"""Create and initialize a sandbox."""
-
+
if config.provider not in cls._providers:
raise ValueError(f"Unknown provider: {config.provider}")
-
+
sandbox_class = cls._providers[config.provider]
sandbox = sandbox_class(config)
-
+
await sandbox.initialize()
-
+
return sandbox
-
+
@classmethod
def register(cls, provider: str, sandbox_class: type):
"""Register a new sandbox provider."""
@@ -360,9 +359,5 @@ def register(cls, provider: str, sandbox_class: type):
# Convenience factory
async def create_sandbox(provider: str, name: str, **kwargs) -> Sandbox:
"""Convenience function to create a sandbox."""
- config = SandboxConfig(
- provider=provider,
- name=name,
- **kwargs
- )
+ config = SandboxConfig(provider=provider, name=name, **kwargs)
return await SandboxFactory.create(config)
diff --git a/src/ai_dev_os/simulation.py b/src/ai_dev_os/simulation.py
index 1c35b1c..0ab7e79 100644
--- a/src/ai_dev_os/simulation.py
+++ b/src/ai_dev_os/simulation.py
@@ -99,7 +99,9 @@ async def run(self) -> SimulationResult:
rewards.append(reward)
if (i + 1) % 25 == 0:
- logger.info(f"Episode {i + 1}/{self.config.episodes} ā avg reward: {sum(rewards) / len(rewards):.2f}")
+ logger.info(
+ f"Episode {i + 1}/{self.config.episodes} ā avg reward: {sum(rewards) / len(rewards):.2f}"
+ )
elapsed = time.time() - start_time
total_steps = self.config.episodes * self.config.max_steps_per_episode
diff --git a/src/ai_dev_os/skills.py b/src/ai_dev_os/skills.py
index e7945d7..1af3296 100644
--- a/src/ai_dev_os/skills.py
+++ b/src/ai_dev_os/skills.py
@@ -1,14 +1,15 @@
import logging
-from typing import Dict, Any
+from typing import Any, Dict
logger = logging.getLogger(__name__)
+
class DebuggingSkill:
"""
Systematic Debugging Skill for AI Dev OS.
Analyzes error traces, instruments code with logging, and identifies root causes.
"""
-
+
def __init__(self, name: str = "systematic-debugging"):
self.name = name
@@ -21,19 +22,21 @@ async def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""
error_trace = context.get("error_trace", "")
logger.info(f"Starting systematic debugging for error: {error_trace[:50]}...")
-
+
# Logic to analyze trace and suggest fixes would go here
# For now, we return a structured refinement
return {
"status": "success",
"analysis": "Identified potential null pointer in core.py",
- "suggested_fix": "Add null check at line 145"
+ "suggested_fix": "Add null check at line 145",
}
+
class PerformanceOptimizationSkill:
"""
Skill for identifying and fixing performance bottlenecks.
"""
+
def __init__(self, name: str = "performance-optimization"):
self.name = name
@@ -41,20 +44,18 @@ async def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
logger.info("Running performance optimization analysis...")
return {
"status": "success",
- "optimizations": ["Vectorize loop in models.py", "Enable caching for sandbox results"]
+ "optimizations": ["Vectorize loop in models.py", "Enable caching for sandbox results"],
}
+
class DocumentationGenerationSkill:
"""
Skill for automatically generating and updating documentation.
"""
+
def __init__(self, name: str = "doc-generation"):
self.name = name
async def execute(self, context: Dict[str, Any]) -> Dict[str, Any]:
logger.info("Generating documentation updates...")
- return {
- "status": "success",
- "updated_files": ["docs/API_REFERENCE.md", "README.md"]
- }
-
+ return {"status": "success", "updated_files": ["docs/API_REFERENCE.md", "README.md"]}
diff --git a/src/ai_dev_os/utils/error_handling.py b/src/ai_dev_os/utils/error_handling.py
index c659126..e250432 100644
--- a/src/ai_dev_os/utils/error_handling.py
+++ b/src/ai_dev_os/utils/error_handling.py
@@ -4,10 +4,12 @@
logger = logging.getLogger(__name__)
+
def with_retry(max_retries=3, base_delay=1.0):
"""
Retry logic decorator.
"""
+
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
@@ -19,9 +21,11 @@ async def wrapper(*args, **kwargs):
last_exception = e
logger.warning(f"Attempt {attempt + 1} failed for {func.__name__}: {str(e)}")
if attempt < max_retries - 1:
- await asyncio.sleep(base_delay * (2 ** attempt))
-
+ await asyncio.sleep(base_delay * (2**attempt))
+
logger.error(f"All {max_retries} attempts failed for {func.__name__}")
raise last_exception
+
return wrapper
+
return decorator
diff --git a/src/ai_dev_os/utils/monitoring.py b/src/ai_dev_os/utils/monitoring.py
index a835587..62f3444 100644
--- a/src/ai_dev_os/utils/monitoring.py
+++ b/src/ai_dev_os/utils/monitoring.py
@@ -1,5 +1,6 @@
import logging
+
def setup_structured_logging():
"""
Stub for structured logging setup.
@@ -7,28 +8,31 @@ def setup_structured_logging():
"""
logger = logging.getLogger()
logger.setLevel(logging.INFO)
-
+
# Check if we already have a JSON-like formatter
has_json_formatter = any("Json" in type(h.formatter).__name__ for h in logger.handlers)
-
+
if not has_json_formatter:
try:
try:
from pythonjsonlogger.json import JsonFormatter
except ImportError:
from pythonjsonlogger import jsonlogger
+
JsonFormatter = jsonlogger.JsonFormatter
-
- formatter = JsonFormatter('%(asctime)s %(levelname)s %(name)s %(message)s')
+
+ formatter = JsonFormatter("%(asctime)s %(levelname)s %(name)s %(message)s")
logHandler = logging.StreamHandler()
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
except ImportError:
# Only add standard formatter if no handlers exist at all
if not logger.handlers:
- formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s')
+ formatter = logging.Formatter(
+ "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
+ )
logHandler = logging.StreamHandler()
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
-
+
return logger
diff --git a/src/integrations/github.py b/src/integrations/github.py
index e15a8fd..93152bf 100644
--- a/src/integrations/github.py
+++ b/src/integrations/github.py
@@ -2,25 +2,26 @@
logger = logging.getLogger(__name__)
+
class GithubIntegration:
"""
Handles GitHub PR webhooks and Issue comments for AI Dev OS.
"""
-
+
def __init__(self, webhook_secret: str):
self.webhook_secret = webhook_secret
-
+
async def handle_comment(self, payload: dict) -> dict:
"""
Process an incoming GitHub PR comment payload.
"""
action = payload.get("action")
logger.info(f"Received GitHub comment action: {action}")
-
+
comment = payload.get("comment", {}).get("body", "")
-
+
if "@openswe" in comment:
logger.info("Triggering orchestrator for GitHub comment")
return {"status": "queued", "message": "Addressing feedback"}
-
+
return {"status": "ignored"}
diff --git a/src/integrations/github_oauth.py b/src/integrations/github_oauth.py
index 6c7e698..57d27ba 100644
--- a/src/integrations/github_oauth.py
+++ b/src/integrations/github_oauth.py
@@ -92,11 +92,13 @@ async def list_repos(self) -> List[Dict[str, str]]:
repos = []
for repo in self.client.get_user().get_repos():
- repos.append({
- "name": repo.full_name,
- "url": repo.html_url,
- "default_branch": repo.default_branch,
- })
+ repos.append(
+ {
+ "name": repo.full_name,
+ "url": repo.html_url,
+ "default_branch": repo.default_branch,
+ }
+ )
return repos
async def get_open_issues(self, repo_name: str) -> List[Dict[str, Any]]:
@@ -108,18 +110,22 @@ async def get_open_issues(self, repo_name: str) -> List[Dict[str, Any]]:
repo = self.client.get_repo(repo_name)
issues = []
for issue in repo.get_issues(state="open"):
- issues.append({
- "number": issue.number,
- "title": issue.title,
- "body": issue.body or "",
- "labels": [l.name for l in issue.labels],
- })
+ issues.append(
+ {
+ "number": issue.number,
+ "title": issue.title,
+ "body": issue.body or "",
+ "labels": [l.name for l in issue.labels],
+ }
+ )
return issues
except GithubException as e:
logger.error(f"Failed to fetch issues: {e}")
return []
- async def create_branch(self, repo_name: str, branch_name: str, from_branch: str = "main") -> bool:
+ async def create_branch(
+ self, repo_name: str, branch_name: str, from_branch: str = "main"
+ ) -> bool:
"""Create a new branch from an existing one."""
if not HAS_GITHUB or not self.client:
logger.warning("Simulating branch creation.")
diff --git a/src/integrations/linear.py b/src/integrations/linear.py
index fea7c02..de156b5 100644
--- a/src/integrations/linear.py
+++ b/src/integrations/linear.py
@@ -1,38 +1,43 @@
import logging
+
import httpx
+
from ai_dev_os.utils.error_handling import with_retry
logger = logging.getLogger(__name__)
+
class LinearIntegration:
"""
Handles Linear webhooks and API for AI Dev OS.
"""
-
+
def __init__(self, webhook_secret: str):
if not webhook_secret or webhook_secret.strip() == "":
raise ValueError("CRITICAL SECURITY ERROR: Linear webhook secret is missing or empty.")
self.webhook_secret = webhook_secret
self.api_url = "https://api.linear.app/graphql"
-
+
@with_retry(max_retries=3)
async def create_issue(self, title: str, description: str, team_id: str) -> dict:
"""
Create a true issue in Linear via GraphQL.
"""
- query = '''
+ query = """
mutation IssueCreate($title: String!, $description: String, $teamId: String!) {
issueCreate(input: {title: $title, description: $description, teamId: $teamId}) {
success
issue { id title }
}
}
- '''
+ """
variables = {"title": title, "description": description, "teamId": team_id}
headers = {"Authorization": self.webhook_secret}
-
+
async with httpx.AsyncClient() as client:
- response = await client.post(self.api_url, json={"query": query, "variables": variables}, headers=headers)
+ response = await client.post(
+ self.api_url, json={"query": query, "variables": variables}, headers=headers
+ )
response.raise_for_status()
logger.info(f"Linear issue created: {title}")
return response.json()
@@ -44,7 +49,7 @@ async def handle_issue(self, payload: dict) -> dict:
logger.info(f"Received Linear action: {payload.get('action')}")
data = payload.get("data", {})
title = data.get("title", "")
-
- # In a real app, this parses descriptions to find instructions
+
+ # In a real app, this parses descriptions to find instructions
logger.info(f"Triggering orchestrator for Linear issue: {title}")
return {"status": "processing", "issue_id": data.get("id")}
diff --git a/src/integrations/slack.py b/src/integrations/slack.py
index 86301a4..8d002c6 100644
--- a/src/integrations/slack.py
+++ b/src/integrations/slack.py
@@ -1,19 +1,22 @@
import logging
+
import httpx
+
from ai_dev_os.utils.error_handling import with_retry
logger = logging.getLogger(__name__)
+
class SlackIntegration:
"""
Handles Slack incoming webhooks and events for AI Dev OS.
"""
-
+
def __init__(self, token: str):
if not token or token.strip() == "":
raise ValueError("CRITICAL SECURITY ERROR: Slack token is missing or empty.")
self.token = token
-
+
@with_retry(max_retries=3)
async def send_notification(self, message: str) -> dict:
"""
@@ -36,5 +39,5 @@ async def handle_message(self, payload: dict) -> dict:
# Here we would invoke AIDevOSOrchestrator
logger.info(f"Triggering orchestrator for request: {text}")
return {"status": "accepted", "message": "Invoking AI Dev OS"}
-
+
return {"status": "ignored", "message": "No trigger found"}
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..1c00ca4
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,9 @@
+import os
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def mock_env_vars():
+ """Set dummy environment variables for tests."""
+ os.environ["ANTHROPIC_API_KEY"] = "sk-ant-dummy-key-for-testing"
diff --git a/tests/test_core.py b/tests/test_core.py
index ddc52ab..998321d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,27 +1,31 @@
-import sys
import os
+import sys
+from unittest.mock import MagicMock, patch
import pytest
-from unittest.mock import MagicMock, patch
-# Mock Python 3.9+ dependencies that are unavailable
-sys.modules['langgraph'] = MagicMock()
-sys.modules['langgraph.graph'] = MagicMock()
-sys.modules['anthropic'] = MagicMock()
+from ai_dev_os.core import (
+ AgentConfig,
+ AIDevOSOrchestrator,
+ SandboxProvider,
+ WorkflowPhase,
+ WorkflowState,
+)
-from ai_dev_os.core import AIDevOSOrchestrator, WorkflowState, WorkflowPhase, AgentConfig, SandboxProvider
@pytest.fixture
def mock_anthropic():
with patch("ai_dev_os.core.Anthropic") as mock:
yield mock
+
@pytest.mark.asyncio
async def test_orchestrator_initialization(mock_anthropic):
orchestrator = AIDevOSOrchestrator(sandbox_provider=SandboxProvider.DOCKER)
assert orchestrator.sandbox_provider == SandboxProvider.DOCKER
assert "brainstorming" in orchestrator.skills
+
@pytest.mark.asyncio
async def test_workflow_state_logging():
state = WorkflowState(id="test-1", phase=WorkflowPhase.BRAINSTORMING, user_request="test")
@@ -29,6 +33,7 @@ async def test_workflow_state_logging():
assert len(state.logs) == 1
assert "Testing log" in state.logs[0]
+
@pytest.mark.asyncio
async def test_agent_config_defaults():
config = AgentConfig(name="test-agent", role="code", sandbox_provider=SandboxProvider.MODAL)
diff --git a/tests/test_core_comprehensive.py b/tests/test_core_comprehensive.py
index 04ffaea..a4584a5 100644
--- a/tests/test_core_comprehensive.py
+++ b/tests/test_core_comprehensive.py
@@ -1,46 +1,42 @@
"""
Comprehensive tests for core.py - AIDevOSOrchestrator and related classes.
"""
-import sys
-import os
-import pytest
-from unittest.mock import MagicMock, patch
import json
+import os
+import sys
import tempfile
+from unittest.mock import MagicMock, patch
+
+import pytest
try:
from unittest.mock import AsyncMock
except ImportError:
+
class AsyncMock(MagicMock):
async def __call__(self, *args, **kwargs):
return super(AsyncMock, self).__call__(*args, **kwargs)
-# Mock heavy dependencies before importing
-sys.modules['langgraph'] = MagicMock()
-sys.modules['langgraph.graph'] = MagicMock()
-sys.modules['anthropic'] = MagicMock()
from ai_dev_os.core import (
- AIDevOSOrchestrator,
- WorkflowState,
- WorkflowPhase,
AgentConfig,
- SandboxProvider,
+ AIDevOSOrchestrator,
ClaudeHUDIntegration,
+ SandboxProvider,
SubagentOrchestrator,
SuperpowerSkill,
+ WorkflowPhase,
+ WorkflowState,
)
-
# āāā Workflow State Tests āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
+
class TestWorkflowState:
def test_state_initialization(self):
state = WorkflowState(
- id="test-1",
- phase=WorkflowPhase.BRAINSTORMING,
- user_request="Build auth module"
+ id="test-1", phase=WorkflowPhase.BRAINSTORMING, user_request="Build auth module"
)
assert state.id == "test-1"
assert state.phase == WorkflowPhase.BRAINSTORMING
@@ -91,13 +87,10 @@ def test_context_usage(self):
# āāā Agent Config Tests āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
+
class TestAgentConfig:
def test_code_agent_defaults(self):
- agent = AgentConfig(
- name="code-agent",
- role="code",
- sandbox_provider=SandboxProvider.DOCKER
- )
+ agent = AgentConfig(name="code-agent", role="code", sandbox_provider=SandboxProvider.DOCKER)
assert "read_file" in agent.tools
assert "write_file" in agent.tools
assert "execute" in agent.tools
@@ -106,9 +99,7 @@ def test_code_agent_defaults(self):
def test_training_agent_defaults(self):
agent = AgentConfig(
- name="training-agent",
- role="training",
- sandbox_provider=SandboxProvider.MODAL
+ name="training-agent", role="training", sandbox_provider=SandboxProvider.MODAL
)
assert "unsloth_train" in agent.tools
assert "bitnet_quantize" in agent.tools
@@ -116,9 +107,7 @@ def test_training_agent_defaults(self):
def test_simulation_agent_defaults(self):
agent = AgentConfig(
- name="sim-agent",
- role="simulation",
- sandbox_provider=SandboxProvider.MODAL
+ name="sim-agent", role="simulation", sandbox_provider=SandboxProvider.MODAL
)
assert "newton_sim" in agent.tools
assert "plot_results" in agent.tools
@@ -126,33 +115,26 @@ def test_simulation_agent_defaults(self):
def test_unknown_role_empty_tools(self):
agent = AgentConfig(
- name="unknown-agent",
- role="unknown",
- sandbox_provider=SandboxProvider.DOCKER
+ name="unknown-agent", role="unknown", sandbox_provider=SandboxProvider.DOCKER
)
assert agent.tools == []
def test_custom_max_tokens(self):
agent = AgentConfig(
- name="test",
- role="code",
- sandbox_provider=SandboxProvider.DOCKER,
- max_tokens=100000
+ name="test", role="code", sandbox_provider=SandboxProvider.DOCKER, max_tokens=100000
)
assert agent.max_tokens == 100000
def test_custom_temperature(self):
agent = AgentConfig(
- name="test",
- role="code",
- sandbox_provider=SandboxProvider.DOCKER,
- temperature=0.3
+ name="test", role="code", sandbox_provider=SandboxProvider.DOCKER, temperature=0.3
)
assert agent.temperature == 0.3
# āāā Orchestrator Tests āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
+
class TestAIDevOSOrchestrator:
@patch("ai_dev_os.core.Anthropic")
def test_initialization(self, mock_anthropic):
@@ -209,13 +191,12 @@ def test_skills_loaded(self, mock_anthropic):
# āāā HUD Integration Tests āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
+
class TestClaudeHUDIntegration:
def test_hud_update_creates_file(self):
hud = ClaudeHUDIntegration()
state = WorkflowState(
- id="hud-test",
- phase=WorkflowPhase.EXECUTION,
- user_request="test request"
+ id="hud-test", phase=WorkflowPhase.EXECUTION, user_request="test request"
)
state.context_usage = 42.5
@@ -241,6 +222,7 @@ def test_hud_update_empty_agents(self):
# āāā Sandbox Provider Tests āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
+
class TestSandboxProvider:
def test_all_providers(self):
assert SandboxProvider.MODAL.value == "modal"
@@ -251,6 +233,7 @@ def test_all_providers(self):
# āāā Workflow Phase Tests āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
+
class TestWorkflowPhase:
def test_all_phases(self):
assert WorkflowPhase.BRAINSTORMING.value == "brainstorming"
diff --git a/tests/test_integrations.py b/tests/test_integrations.py
index a22b310..43125b3 100644
--- a/tests/test_integrations.py
+++ b/tests/test_integrations.py
@@ -1,71 +1,64 @@
-import sys
import os
+import sys
+from unittest.mock import MagicMock, patch
import pytest
-from unittest.mock import MagicMock, patch
try:
from unittest.mock import AsyncMock
except ImportError:
+
class AsyncMock(MagicMock):
async def __call__(self, *args, **kwargs):
return super(AsyncMock, self).__call__(*args, **kwargs)
+
# Mock HTTPX
-sys.modules['httpx'] = MagicMock()
+sys.modules["httpx"] = MagicMock()
# We will test the basic webhook classes that handle standard JSON requests
+
@pytest.mark.asyncio
async def test_slack_webhook():
from integrations.slack import SlackIntegration
-
+
slack = SlackIntegration("dummy_token")
# Simulate an incoming message event
- payload = {
- "type": "message",
- "channel": "C12345",
- "text": "@openswe fix the login page"
- }
-
+ payload = {"type": "message", "channel": "C12345", "text": "@openswe fix the login page"}
+
# Mock the internal orchestrator call
slack.handle_message = AsyncMock(return_value={"status": "accepted"})
result = await slack.handle_message(payload)
-
+
assert result["status"] == "accepted"
@pytest.mark.asyncio
async def test_linear_webhook():
from integrations.linear import LinearIntegration
-
+
linear = LinearIntegration("dummy_secret")
- payload = {
- "action": "create",
- "data": {
- "id": "ISSUE-1",
- "title": "Fix the bug in production"
- }
- }
-
+ payload = {"action": "create", "data": {"id": "ISSUE-1", "title": "Fix the bug in production"}}
+
linear.handle_issue = AsyncMock(return_value={"status": "processing"})
result = await linear.handle_issue(payload)
-
+
assert result["status"] == "processing"
@pytest.mark.asyncio
async def test_github_webhook():
from integrations.github import GithubIntegration
-
+
github = GithubIntegration("dummy_secret")
payload = {
"action": "created",
"issue": {"number": 1},
- "comment": {"body": "@openswe Address the review feedback"}
+ "comment": {"body": "@openswe Address the review feedback"},
}
-
+
github.handle_comment = AsyncMock(return_value={"status": "queued"})
result = await github.handle_comment(payload)
-
+
assert result["status"] == "queued"
diff --git a/tests/test_models.py b/tests/test_models.py
index 4279000..397b20c 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,17 +1,22 @@
import pytest
-from ai_dev_os.models import ModelConfig, UnslothTrainer, QuantizationType
+
+from ai_dev_os.models import ModelConfig, QuantizationType, UnslothTrainer
+
@pytest.mark.asyncio
async def test_model_config():
- config = ModelConfig(model_name="test-model", task="train", quantization=QuantizationType.BITNET_1BIT)
+ config = ModelConfig(
+ model_name="test-model", task="train", quantization=QuantizationType.BITNET_1BIT
+ )
assert config.model_name == "test-model"
assert config.quantization == QuantizationType.BITNET_1BIT
+
@pytest.mark.asyncio
async def test_unsloth_trainer_mock(monkeypatch):
config = ModelConfig(model_name="test-model", task="train")
trainer = UnslothTrainer(config)
-
+
success, metrics = await trainer.train()
assert success is True
assert "final_loss" in metrics
diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py
index fa5c33d..622e911 100644
--- a/tests/test_sandbox.py
+++ b/tests/test_sandbox.py
@@ -1,5 +1,7 @@
import pytest
-from ai_dev_os.sandbox import SandboxConfig, ModalSandbox, SandboxStatus
+
+from ai_dev_os.sandbox import ModalSandbox, SandboxConfig, SandboxStatus
+
@pytest.mark.asyncio
async def test_sandbox_config():
@@ -7,13 +9,14 @@ async def test_sandbox_config():
assert config.provider == "modal"
assert config.gpu is True
+
@pytest.mark.asyncio
async def test_modal_sandbox_mock():
config = SandboxConfig(provider="modal", name="test-sandbox")
# Mocking modal import if needed, but the class handles it
sandbox = ModalSandbox(config)
assert sandbox.status == SandboxStatus.INITIALIZING
-
+
# Since initialize() requires 'modal' package, we might skip or mock it
# For this test, we just check the status change in execute
exit_code, stdout, stderr = await sandbox.execute("ls")
diff --git a/tests/test_skills.py b/tests/test_skills.py
index 01fb051..7f2fe0b 100644
--- a/tests/test_skills.py
+++ b/tests/test_skills.py
@@ -1,5 +1,11 @@
import pytest
-from ai_dev_os.skills import DebuggingSkill, PerformanceOptimizationSkill, DocumentationGenerationSkill
+
+from ai_dev_os.skills import (
+ DebuggingSkill,
+ DocumentationGenerationSkill,
+ PerformanceOptimizationSkill,
+)
+
@pytest.mark.asyncio
async def test_debugging_skill():
@@ -8,6 +14,7 @@ async def test_debugging_skill():
assert result["status"] == "success"
assert "analysis" in result
+
@pytest.mark.asyncio
async def test_performance_skill():
skill = PerformanceOptimizationSkill()
@@ -15,6 +22,7 @@ async def test_performance_skill():
assert result["status"] == "success"
assert len(result["optimizations"]) > 0
+
@pytest.mark.asyncio
async def test_doc_skill():
skill = DocumentationGenerationSkill()
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b9951a5..2c24a28 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,74 +1,83 @@
-import sys
+import asyncio
import os
+import sys
+from unittest.mock import MagicMock, patch
import pytest
-import asyncio
-from unittest.mock import MagicMock, patch
try:
from unittest.mock import AsyncMock
except ImportError:
+
class AsyncMock(MagicMock):
async def __call__(self, *args, **kwargs):
return super(AsyncMock, self).__call__(*args, **kwargs)
+
from ai_dev_os.utils.error_handling import with_retry
from ai_dev_os.utils.monitoring import setup_structured_logging
+
@pytest.mark.asyncio
async def test_with_retry_success_on_first_try():
mock_func = AsyncMock(return_value="success")
-
+
@with_retry(max_retries=3, base_delay=0.1)
async def my_func():
return await mock_func()
-
+
result = await my_func()
assert result == "success"
assert mock_func.call_count == 1
+
@pytest.mark.asyncio
async def test_with_retry_success_after_failure():
mock_func = AsyncMock(side_effect=[ValueError("fail"), "success"])
-
+
@with_retry(max_retries=3, base_delay=0.1)
async def my_func():
return await mock_func()
-
+
result = await my_func()
assert result == "success"
assert mock_func.call_count == 2
+
@pytest.mark.asyncio
async def test_with_retry_all_failures():
mock_func = AsyncMock(side_effect=ValueError("fail"))
-
+
@with_retry(max_retries=3, base_delay=0.1)
async def my_func():
return await mock_func()
-
+
with pytest.raises(ValueError, match="fail"):
await my_func()
-
+
assert mock_func.call_count == 3
+
def test_setup_structured_logging():
logger = setup_structured_logging()
assert logger.level <= 20 # INFO or lower
-
+
try:
try:
from pythonjsonlogger.json import JsonFormatter
except ImportError:
from pythonjsonlogger import jsonlogger
+
JsonFormatter = jsonlogger.JsonFormatter
has_json = True
except ImportError:
has_json = False
-
+
if has_json:
# Check if any handler has a formatter that looks like a JSON formatter
# Newer versions might have different class names or structures
assert any("Json" in type(h.formatter).__name__ for h in logger.handlers)
else:
- assert any(isinstance(h.formatter, __import__('logging').Formatter) for h in logger.handlers)
+ assert any(
+ isinstance(h.formatter, __import__("logging").Formatter) for h in logger.handlers
+ )