diff --git a/.env.example b/.env.example index d2c7200..f852d23 100644 --- a/.env.example +++ b/.env.example @@ -37,3 +37,7 @@ GIT_AUTO_PUSH=false # Automatically push commits to remote MAX_CONCURRENT_REQUESTS=5 STREAMING_ENABLED=false ENABLE_CHECKPOINTS=true + +# IRC Configuration +NEXT_PUBLIC_IRC_WS_URL=ws://localhost:8080 +NEXT_PUBLIC_IRC_CHANNEL=#devussy-chat diff --git a/.gitignore b/.gitignore index f7e5023..021602a 100644 --- a/.gitignore +++ b/.gitignore @@ -64,7 +64,6 @@ QUICK_SETUP.md START_HERE.txt # Generated project outputs (in root) -/devplan.md /handoff_prompt.md /project_design.md /phase*.md @@ -116,3 +115,4 @@ yarn-error.log* .yarn/ dist/ .vite/ +devussy-web/streaming_server/analytics.db diff --git a/AGENTS.md b/AGENTS.md index 3543a90..2bf7fcf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -2,6 +2,62 @@ This file provides guidance to agents when working with code in this repository. +## πŸ”— CRITICAL: Anchor-Based Context Management + +> **⚠️ READ THIS FIRST - This is the most important pattern in this project.** + +Devussy uses **stable HTML comment anchors** for efficient context management and safe file updates. **All agents MUST use anchors** when reading/writing devplan, phase, and handoff files. + +### Required Anchors + +| File | Anchor | Purpose | +|------|--------|---------| +| devplan.md | `` / `` | Track completed work | +| devplan.md | `` / `` | Current 3-5 tasks to execute | +| phase*.md | `` / `` | Phase-specific tasks | +| phase*.md | `` / `` | Outcomes and blockers | +| handoff_prompt.md | `` / `` | Status snapshot | +| handoff_prompt.md | `` / `` | Agent handoff notes | + +### How to Use Anchors + +**Reading (CORRECT):** +``` +Read devplan.md lines between and +# Result: ~100 tokens loaded +``` + +**Reading (WRONG):** +``` +Read entire devplan.md +# Result: ~3000 tokens wasted +``` + +**Writing:** Always use `safe_write_devplan()` from `src/file_manager.py`: +- Creates `.bak` backup before writing +- Validates anchors exist in new content +- Refuses to overwrite if anchors missing (writes to `.tmp` instead) + +### Token Budget + +| File | Section | ~Tokens | When to Read | +|------|---------|---------|--------------| +| handoff.md | Progress Log | ~200 | Start of session | +| devplan.md | NEXT_TASK_GROUP | ~100 | Every turn | +| devplan.md | PROGRESS_LOG | ~100 | If needed | +| phase*.md | PHASE_TASKS | ~80 | When working on phase | + +**Target: Stay under 500 tokens per turn by reading ONLY anchored sections.** + +### Validation + +`file_manager.py:_validate_devplan_content()` enforces: +- Header: `# Development Plan` or `## πŸ“‹ Project Dashboard` +- Phase table: `### πŸš€ Phase Overview` with `| Phase |` +- Anchors: `` and `` + +--- + ## Critical Commands (Non-Obvious) - **Test single file**: `pytest tests/unit/test_cli.py::TestGenerateDesignCommand::test_generate_design_minimal_args -v` - **Run integration tests**: `pytest tests/integration/ -v` diff --git a/DevDocs/JINJA_DATA_SAMPLES/basic_devplan.jinja.json b/DevDocs/JINJA_DATA_SAMPLES/basic_devplan.jinja.json index 62333c6..6bda84d 100644 --- a/DevDocs/JINJA_DATA_SAMPLES/basic_devplan.jinja.json +++ b/DevDocs/JINJA_DATA_SAMPLES/basic_devplan.jinja.json @@ -1,83 +1,33 @@ { - "repo_context": { - "project_type": "python", - "structure": { - "source_dirs": [ - "src" - ], - "test_dirs": [ - "tests" - ], - "config_dirs": [ - "config" - ], - "has_ci": true - }, - "dependencies": { - "python": [ - "fastapi", - "uvicorn" - ] - }, - "metrics": { - "total_files": 42, - "total_lines": 1337 - }, - "patterns": { - "test_frameworks": [ - "pytest" - ], - "build_tools": [ - "poetry" - ] - }, - "project_name": "ExistingApp", - "description": "An existing app", - "version": "1.0.0", - "author": "Dev" - }, "project_design": { - "project_name": "SuperApp", - "languages": [ - "Python", - "TypeScript" - ], - "frameworks": [ - "FastAPI", - "React" - ], - "apis": [ - "OpenAI", - "Stripe" - ], - "requirements": "Build a scalable web app.", + "project_name": "test-project", "objectives": [ - "High performance", - "User friendly" + "Build web application", + "Create API" ], "tech_stack": [ - "Python 3.11", - "React 18", + "Python", + "FastAPI", "PostgreSQL" ], - "architecture_overview": "Microservices architecture...", + "architecture_overview": "Test architecture overview", "dependencies": [ - "sqlalchemy", - "pydantic" + "requests", + "pydantic", + "uvicorn" ], "challenges": [ - "Concurrency", - "Data consistency" + "Performance optimization", + "Scalability" ], "mitigations": [ - "Use async/await", - "Use transactions" + "Use caching", + "Load balancing" ], - "complexity": "Medium", - "estimated_phases": 5 + "raw_llm_response": null, + "complexity": null, + "estimated_phases": null }, - "code_samples": "def hello(): pass", - "interactive_session": { - "question_count": 5 - } + "task_group_size": 3, + "detail_level": "normal" } \ No newline at end of file diff --git a/DevDocs/JINJA_DATA_SAMPLES/detailed_devplan.jinja.json b/DevDocs/JINJA_DATA_SAMPLES/detailed_devplan.jinja.json index a4809ad..07e68dc 100644 --- a/DevDocs/JINJA_DATA_SAMPLES/detailed_devplan.jinja.json +++ b/DevDocs/JINJA_DATA_SAMPLES/detailed_devplan.jinja.json @@ -1,48 +1,9 @@ { - "repo_context": { - "project_type": "python", - "structure": { - "source_dirs": [ - "src" - ], - "test_dirs": [ - "tests" - ], - "config_dirs": [ - "config" - ], - "has_ci": true - }, - "dependencies": { - "python": [ - "fastapi", - "uvicorn" - ] - }, - "metrics": { - "total_files": 42, - "total_lines": 1337 - }, - "patterns": { - "test_frameworks": [ - "pytest" - ], - "build_tools": [ - "poetry" - ] - }, - "project_name": "ExistingApp", - "description": "An existing app", - "version": "1.0.0", - "author": "Dev" - }, - "phase_number": 1, - "phase_title": "Setup", - "phase_description": "Initialize the project.", - "project_name": "SuperApp", - "tech_stack": [ - "Python", - "Git" - ], - "code_samples": "print('hello')" + "phase_number": 3, + "phase_title": "Phase 3", + "phase_description": "", + "project_name": "test", + "tech_stack": [], + "task_group_size": 3, + "detail_level": "normal" } \ No newline at end of file diff --git a/DevDocs/JINJA_DATA_SAMPLES/handoff_prompt.jinja.json b/DevDocs/JINJA_DATA_SAMPLES/handoff_prompt.jinja.json index dab14dd..e590681 100644 --- a/DevDocs/JINJA_DATA_SAMPLES/handoff_prompt.jinja.json +++ b/DevDocs/JINJA_DATA_SAMPLES/handoff_prompt.jinja.json @@ -1,45 +1,9 @@ { - "project_name": "SuperApp", - "repo_context": { - "project_type": "python", - "structure": { - "source_dirs": [ - "src" - ], - "test_dirs": [ - "tests" - ], - "config_dirs": [ - "config" - ], - "has_ci": true - }, - "dependencies": { - "python": [ - "fastapi", - "uvicorn" - ] - }, - "metrics": { - "total_files": 42, - "total_lines": 1337 - }, - "patterns": { - "test_frameworks": [ - "pytest" - ], - "build_tools": [ - "poetry" - ] - }, - "project_name": "ExistingApp", - "description": "An existing app", - "version": "1.0.0", - "author": "Dev" - }, - "current_phase_number": 2, - "current_phase_name": "Core Logic", - "next_task_id": "2.1", - "next_task_description": "Implement auth", - "blockers": "None" + "project_name": "TestProject", + "current_phase_number": "None", + "current_phase_name": "No active phase", + "next_task_id": "1.1", + "next_task_description": "Initialize project", + "blockers": "None known", + "detail_level": "normal" } \ No newline at end of file diff --git a/DevDocs/JINJA_DATA_SAMPLES/project_design.jinja.json b/DevDocs/JINJA_DATA_SAMPLES/project_design.jinja.json index 3965807..d605dc7 100644 --- a/DevDocs/JINJA_DATA_SAMPLES/project_design.jinja.json +++ b/DevDocs/JINJA_DATA_SAMPLES/project_design.jinja.json @@ -1,40 +1,14 @@ { - "project_name": "SuperApp", + "project_name": "Test Project", + "requirements": [ + "Req 1", + "Req 2" + ], "languages": [ - "Python", - "TypeScript" + "Python" ], "frameworks": [ - "FastAPI", - "React" - ], - "apis": [ - "OpenAI", - "Stripe" - ], - "requirements": "Build a scalable web app.", - "objectives": [ - "High performance", - "User friendly" - ], - "tech_stack": [ - "Python 3.11", - "React 18", - "PostgreSQL" - ], - "architecture_overview": "Microservices architecture...", - "dependencies": [ - "sqlalchemy", - "pydantic" - ], - "challenges": [ - "Concurrency", - "Data consistency" - ], - "mitigations": [ - "Use async/await", - "Use transactions" + "FastAPI" ], - "complexity": "Medium", - "estimated_phases": 5 + "apis": [] } \ No newline at end of file diff --git a/README.md b/README.md index 40298a0..ccf3b4d 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,13 @@ Devussy turns a short project idea into a complete, actionable development plan. - New **Streaming Options** menu in Settings lets you toggle phases individually without touching config files. - Concurrency controls now live in Settings as well (max concurrent API requests / phases). +**πŸ“Š Backend web analytics (server-side, opt-out supported)** + +- Added a lightweight **server-side analytics module** behind the FastAPI streaming server. +- Tracks anonymized sessions (hashed IP + user-agent), API calls (endpoint, method, status, latency, sizes), and design inputs for the web UI. +- All analytics are kept **on the server only** (SQLite), with a simple `/api/analytics/overview` endpoint for internal inspection. +- Users can set a **β€œDisable anonymous usage analytics for this browser”** toggle in the Help window, which writes a `devussy_analytics_optout` cookie; when set, both the middleware and design endpoint completely skip analytics logging. + **🧱 Under-the-hood fixes** - Hardened `LLMInterviewManager` to be explicitly mode-aware (`initial` vs `design_review`). @@ -344,6 +351,70 @@ Legacy Devussy docs, handoff summaries, and helper scripts have been moved into - Code samples included for LLM context - Backward compatible (works without repo analysis) +### Adaptive Complexity Pipeline βœ… (NEW) + +Devussy now includes an **adaptive complexity system** that intelligently scales output based on project requirements. + +**Complexity Analysis:** +- Automatic project complexity scoring (0-20 scale) +- Dynamic depth level detection (minimal/standard/detailed) +- Phase count estimation (3-15 phases based on complexity) +- Confidence scoring with follow-up question triggers +- Supports both rule-based (testing) and LLM-driven (production) assessment + +**Design Validation System:** +- 5 validation checks: consistency, completeness, scope alignment, hallucination detection, over-engineering detection +- Rule-based validation for deterministic testing +- LLM-powered semantic review for production +- Auto-correctable issue identification +- Detailed issue reports with suggestions + +**Correction Loop:** +- Iterative design improvement (max 3 iterations) +- Automatic correction of identified issues +- Confidence threshold (0.8) for approval +- Manual review escalation when needed +- Full correction history tracking + +**Adaptive Output Generation:** +- Template-based output scaling (minimal/standard/detailed) +- Dynamic phase count based on complexity +- Complexity-aware design generator +- Per-depth-level devplan templates + +**Usage:** +```bash +# Run adaptive pipeline via CLI +python -m src.cli run-adaptive-pipeline \ + --name "My Project" \ + --languages "Python,TypeScript" \ + --requirements "Build a REST API" \ + --validation \ + --correction + +# Or use interview JSON +python -m src.cli run-adaptive-pipeline \ + --interview-file interview_data.json +``` + +**Web UI Components:** +- `ComplexityAssessment` - Visual score gauge, depth indicator, phase estimate +- `ValidationReport` - Issue display with severity, auto-fix badges, LLM review +- `CorrectionTimeline` - Iteration history with progress tracking + +**Testing:** +```bash +# Backend adaptive pipeline tests +pytest tests/integration/test_adaptive_pipeline_e2e.py -v +pytest tests/integration/test_adaptive_pipeline_orchestrator.py -v + +# Frontend component tests +cd devussy-web && npm test + +# Build Storybook for visual components +cd devussy-web && npm run build-storybook +``` + ### Terminal UI (Phases 4-5) βœ… **Foundation (Phase 4):** - Responsive grid layout (5 cols / 3x2 / 1x5) @@ -379,6 +450,27 @@ Legacy Devussy docs, handoff summaries, and helper scripts have been moved into - Integration tests for full workflows - Real-world validation with actual APIs +## Documentation for Agents + +> **Important:** If you're an AI agent working on this codebase, read `AGENTS.md` first. + +### Anchor-Based Context Management + +Devussy uses **stable HTML comment anchors** to enable efficient circular development. All planning/handoff documents contain anchors like: + +```markdown + +... content ... + +``` + +**Key rules for agents:** +1. Read ONLY anchored sections, not entire files (saves 90%+ tokens) +2. Use `safe_write_devplan()` from `src/file_manager.py` for writes (validates anchors, creates backups) +3. Never remove or modify anchor comments themselves + +See `AGENTS.md`, `WARP.md`, and `handoff.md` for comprehensive anchor documentation. + ## Troubleshooting - No output files? Ensure the appropriate provider key is set (OPENAI_API_KEY, AETHER_API_KEY, REQUESTY_API_KEY, AGENTROUTER_API_KEY, or GENERIC_API_KEY). - Status line missing? Make sure your terminal supports ANSI; non-TTY environments will still print stage lines and progress. diff --git a/START_HERE.md b/START_HERE.md index 4828b42..10c53bb 100644 --- a/START_HERE.md +++ b/START_HERE.md @@ -4,6 +4,19 @@ --- +## πŸ”— CRITICAL: Anchor-Based Context Management + +> **⚠️ READ THIS FIRST** before diving into the codebase. + +Devussy uses **stable HTML comment anchors** for efficient context management. See `AGENTS.md` for the complete guide. + +**Quick rules:** +1. Read ONLY anchored sections (e.g., `` to ``), not entire files +2. Use `safe_write_devplan()` from `src/file_manager.py` for all devplan/phase file writes +3. Never remove anchor comments - they're required for validation + +--- + ## 🎯 What You Need to Know ### Status: βœ… PRODUCTION READY @@ -25,11 +38,11 @@ Polish the application and add remaining features (GitHub, download, persistence ### 1. **QUICK_START.md** (2 minutes) Start the application and verify it works. -### 2. **HANDOFF_FOR_NEXT_AGENT.md** (15 minutes) -Complete context on what was done and what needs doing. +### 2. **handoff.md** (15 minutes) +Complete context on what was done, adaptive pipeline implementation, and what needs doing. -### 3. **DEVPLAN_FOR_NEXT_AGENT.md** (10 minutes) -Detailed plan with 10 phases and time estimates. +### 3. **devplan.md** (10 minutes) +Detailed plan with phases, progress log, and next task group. ### 4. **README.md** (5 minutes) Project documentation and architecture. @@ -97,8 +110,8 @@ http://localhost:3000 ### Check These First - Browser console (F12) - Backend console -- `SESSION_HANDOFF.md` - Detailed technical info -- `HANDOFF_FOR_NEXT_AGENT.md` - Common issues +- `handoff.md` - Detailed technical info and milestone progress +- `AGENTS.md` - Anchor-based context management guide ### Key Files - `src/components/pipeline/ExecutionView.tsx` - Execution phase @@ -127,8 +140,8 @@ Now make it shine! ✨ --- **Next Steps**: -1. Read HANDOFF_FOR_NEXT_AGENT.md -2. Read DEVPLAN_FOR_NEXT_AGENT.md -3. Start with Phase 1 (Code Cleanup) +1. Read handoff.md (especially Milestone 5 for latest updates) +2. Read devplan.md (check `` section) +3. Start with Frontend Phase 2 (ComplexityAssessment.tsx) **Good luck!** πŸš€ diff --git a/adaptive_pipeline_llm_ideas.md b/adaptive_pipeline_llm_ideas.md new file mode 100644 index 0000000..54c22f0 --- /dev/null +++ b/adaptive_pipeline_llm_ideas.md @@ -0,0 +1,455 @@ +# Adaptive Pipeline LLM Integration Ideas + +This document captures the intended **LLM-backed behavior** for each mocked component in the adaptive pipeline. Once the mock-only backend is stable and tested, this will be the source of truth for designing prompts, schemas, and API integration. + +> **IMPORTANT:** The current implementation uses static heuristics as a **testing scaffold**. The production system should use **LLM-driven dynamic assessment** that analyzes actual project requirements holistically rather than mapping to fixed buckets. + +--- + +## Design Philosophy: Mock β†’ LLM Transition + +### Why Start with Mocks? +1. **Deterministic Testing:** Unit tests need predictable outputs +2. **Fast Iteration:** No API latency during development +3. **Cost Control:** Avoid token costs while iterating on logic +4. **Schema Validation:** Prove the data structures work before LLM integration + +### Production LLM Behavior Goals +1. **Holistic Analysis:** LLM considers full project context, not just keyword matching +2. **Nuanced Scoring:** Complexity factors interact (e.g., "simple CRUD with ML" is more complex than either alone) +3. **Hidden Complexity Detection:** LLM can identify compliance, security, or scaling requirements not explicitly stated +4. **Adaptive Follow-Ups:** LLM generates targeted clarification questions based on gaps +5. **Transparent Reasoning:** LLM explains its complexity assessment for user validation + +--- + +## 1. Complexity Analyzer (`src/interview/complexity_analyzer.py`) + +### Current Mock Behavior + +- Pure-Python heuristics infer: + - `project_type_bucket` from `project_type` string (CLI, API, Web App, SaaS, etc.). + - `technical_complexity_bucket` from keywords in `requirements` / `frameworks`. + - `integration_bucket` from simple counts of `apis`. + - `team_size_bucket` from `team_size` string or number. +- Computes `score`, `estimated_phase_count`, `depth_level`, and a simple `confidence`. + +### Future LLM-Powered Behavior (Production) + +The LLM should analyze projects **dynamically** rather than fitting into predefined buckets: + +#### Prompt Template (Complexity Assessment) +``` +You are a senior software architect analyzing a project to determine appropriate development complexity and planning depth. + +## Project Information +- **Project Name:** {project_name} +- **Description:** {description} +- **Project Type:** {project_type} +- **Technical Requirements:** {requirements} +- **Target Frameworks/Tech Stack:** {frameworks} +- **External Integrations:** {apis} +- **Team Size:** {team_size} +- **Timeline Constraints:** {timeline} + +## Additional Context (if available) +{repository_analysis_summary} + +## Your Task +Analyze this project holistically and provide a complexity assessment. Consider: +1. How the various complexity factors INTERACT (a simple CRUD + ML integration is more complex than either alone) +2. Hidden complexity signals like compliance requirements, data sensitivity, or scaling needs +3. Team experience implications (larger teams need more coordination overhead) +4. Any unstated assumptions that add complexity + +Respond with ONLY valid JSON matching this schema: +```json +{ + "complexity_score": , + "estimated_phase_count": , + "depth_level": "minimal" | "standard" | "detailed", + "confidence": , + "rationale": "", + "complexity_factors": { + "project_scope": <1-5>, + "technical_depth": <1-5>, + "integration_complexity": <0-5>, + "team_coordination": <1-3>, + "hidden_complexity": <0-3> + }, + "hidden_complexity_flags": ["