From 15f120e07ac7fd79fac464be983a58c7b26846fa Mon Sep 17 00:00:00 2001 From: Ashad Qureshi Date: Tue, 17 Jun 2025 12:17:38 +0500 Subject: [PATCH 1/7] Default Agents now loaded from DB --- .gitignore | 2 +- DEFAULT_AGENTS_SETUP.md | 47 + auto-analyst-backend/.gitignore | 2 +- auto-analyst-backend/DEFAULT_AGENTS_SETUP.md | 237 ++++ auto-analyst-backend/app.py | 331 +++-- auto-analyst-backend/chat_database.db | 4 +- auto-analyst-backend/heatmap_correlation.png | 3 + auto-analyst-backend/load_default_agents.py | 42 + auto-analyst-backend/src/agents/agents.py | 1197 ++++++++--------- .../src/agents/deep_agents.py | 14 +- auto-analyst-backend/src/db/init_db.py | 20 +- .../src/db/init_default_agents.py | 281 ++++ .../src/managers/session_manager.py | 22 +- .../src/managers/user_manager.py | 51 +- .../src/routes/templates_routes.py | 133 +- auto-analyst-frontend/app/account/page.tsx | 3 - .../custom-templates/TemplatesModal.tsx | 36 +- test_default_agents.py | 1 + 18 files changed, 1607 insertions(+), 819 deletions(-) create mode 100644 DEFAULT_AGENTS_SETUP.md create mode 100644 auto-analyst-backend/DEFAULT_AGENTS_SETUP.md create mode 100644 auto-analyst-backend/heatmap_correlation.png create mode 100644 auto-analyst-backend/load_default_agents.py create mode 100644 auto-analyst-backend/src/db/init_default_agents.py create mode 100644 test_default_agents.py diff --git a/.gitignore b/.gitignore index a292cd68..0400e75c 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,5 @@ terraform/.tfvars try* -*-2.db +*.db *_code*.py \ No newline at end of file diff --git a/DEFAULT_AGENTS_SETUP.md b/DEFAULT_AGENTS_SETUP.md new file mode 100644 index 00000000..df341e55 --- /dev/null +++ b/DEFAULT_AGENTS_SETUP.md @@ -0,0 +1,47 @@ +# Default Agents Setup Guide + +## Overview + +The system includes 4 default agents automatically loaded on app startup: + +1. **Data Preprocessing Agent** - Data cleaning and preparation +2. **Statistical Analytics Agent** - Statistical analysis using statsmodels +3. **Machine Learning Agent** - ML modeling using scikit-learn +4. **Data Visualization Agent** - Interactive visualizations using Plotly + +## Automatic Setup + +Default agents are automatically initialized when the application starts. You'll see: + +``` +Initializing default agents on startup... +✅ Default agents initialized successfully +``` + +## User Template Preferences + +- **Default agents (preprocessing, statistical_analytics, sk_learn, data_viz) are ENABLED by default** for all users +- **Other templates are DISABLED by default** and must be explicitly enabled +- Only enabled templates appear in the planner + +## Key Features + +- Agents load automatically on startup +- No manual setup required +- Default agents are active by default for better user experience +- Users can still disable default agents if desired +- Planner shows helpful messages when no agents enabled +- Full API support for template management + +## API Endpoints + +- `GET /templates/user/{user_id}` - Get user preferences +- `POST /templates/user/{user_id}/template/{template_id}/toggle` - Enable/disable templates +- `GET /templates/user/{user_id}/enabled` - Get enabled templates only + +## Manual Script (Optional) + +For manual updates: +```bash +python load_default_agents.py +``` \ No newline at end of file diff --git a/auto-analyst-backend/.gitignore b/auto-analyst-backend/.gitignore index 5f058853..43b85dde 100644 --- a/auto-analyst-backend/.gitignore +++ b/auto-analyst-backend/.gitignore @@ -25,7 +25,7 @@ migrations/ alembic.ini -*-2.db +*.db schema*.md diff --git a/auto-analyst-backend/DEFAULT_AGENTS_SETUP.md b/auto-analyst-backend/DEFAULT_AGENTS_SETUP.md new file mode 100644 index 00000000..1242c834 --- /dev/null +++ b/auto-analyst-backend/DEFAULT_AGENTS_SETUP.md @@ -0,0 +1,237 @@ +# Default Agents Setup Guide + +This guide explains how to set up and use the default agents system in the Auto-Analyst backend. + +## Overview + +The system now includes 4 default agents that are stored in the database as templates: + +1. **Data Preprocessing Agent** (`preprocessing_agent`) - Data cleaning and preparation +2. **Statistical Analytics Agent** (`statistical_analytics_agent`) - Statistical analysis using statsmodels +3. **Machine Learning Agent** (`sk_learn_agent`) - ML modeling using scikit-learn +4. **Data Visualization Agent** (`data_viz_agent`) - Interactive visualizations using Plotly + +## Setup Instructions + +### 1. Load Default Agents into Database + +Run the setup script to populate the database with default agents: + +```bash +cd Auto-Analyst-CS/auto-analyst-backend +python load_default_agents.py +``` + +**Or** use the API endpoint: + +```bash +curl -X POST "http://localhost:8000/templates/load-default-agents" \ + -H "Content-Type: application/json" \ + -d '{"force_update": false}' +``` + +### 2. Agent Properties + +All default agents are created with: +- `is_active = True` (available for use) +- `is_premium_only = False` (free to use) +- Proper categories (Data Manipulation, Statistical Analysis, Modelling, Visualization) + +## User Preferences System + +### Default Behavior +- **Default agents (preprocessing, statistical_analytics, sk_learn, data_viz) are ENABLED by default** for all users +- **Other templates are DISABLED by default** and must be explicitly enabled +- Templates can be used directly via `@template_name` regardless of preferences + +### Managing User Preferences + +#### Enable/Disable Templates +```bash +# Enable a template for a user +curl -X POST "http://localhost:8000/templates/user/1/template/1/toggle" \ + -H "Content-Type: application/json" \ + -d '{"is_enabled": true}' + +# Disable a template for a user +curl -X POST "http://localhost:8000/templates/user/1/template/1/toggle" \ + -H "Content-Type: application/json" \ + -d '{"is_enabled": false}' +``` + +#### Bulk Enable/Disable +```bash +# Enable multiple templates at once +curl -X POST "http://localhost:8000/templates/user/1/bulk-toggle" \ + -H "Content-Type: application/json" \ + -d '{ + "template_preferences": { + "1": true, + "2": true, + "3": false + } + }' +``` + +#### Get User's Template Preferences +```bash +# Get all templates with user's enabled/disabled status +curl "http://localhost:8000/templates/user/1" + +# Get only enabled templates for user +curl "http://localhost:8000/templates/user/1/enabled" + +# Get enabled templates for planner (max 10, ordered by usage) +curl "http://localhost:8000/templates/user/1/enabled/planner" +``` + +## Planner Integration + +### How It Works +1. **Template Loading**: Only user-enabled templates are loaded into the planner +2. **No Agents Available**: If no templates are enabled, planner returns a helpful message +3. **Usage Tracking**: Template usage is tracked for prioritization + +### Planner Response When No Agents Enabled +```json +{ + "complexity": "no_agents_available", + "plan": "no_agents_available", + "plan_instructions": { + "message": "No agents are currently enabled for analysis. Please enable at least one agent (preprocessing, statistical analysis, machine learning, or visualization) in your template preferences to proceed with data analysis." + } +} +``` + +## API Endpoints + +### Template Management +- `GET /templates/` - Get all available templates +- `GET /templates/template/{template_id}` - Get specific template +- `POST /templates/load-default-agents` - Load default agents into database + +### User Preferences +- `GET /templates/user/{user_id}` - Get user's template preferences +- `GET /templates/user/{user_id}/enabled` - Get user's enabled templates +- `GET /templates/user/{user_id}/enabled/planner` - Get templates for planner (max 10) +- `POST /templates/user/{user_id}/template/{template_id}/toggle` - Toggle template preference +- `POST /templates/user/{user_id}/bulk-toggle` - Bulk toggle preferences + +### Categories +- `GET /templates/categories/list` - Get all categories +- `GET /templates/categories` - Get templates grouped by category +- `GET /templates/category/{category}` - Get templates in specific category + +## Usage Examples + +### Frontend Integration +```typescript +// Enable preprocessing and visualization agents for user +const enableAgents = async (userId: number) => { + await fetch(`/templates/user/${userId}/bulk-toggle`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + template_preferences: { + "1": true, // preprocessing_agent + "4": true // data_viz_agent + } + }) + }); +}; + +// Get user's enabled templates +const getUserTemplates = async (userId: number) => { + const response = await fetch(`/templates/user/${userId}/enabled`); + return await response.json(); +}; +``` + +### Direct Agent Usage +Users can still use any agent directly regardless of preferences: +``` +@preprocessing_agent clean this data +@data_viz_agent create a scatter plot of sales vs price +``` + +### Planner Usage +Only enabled agents will be available to the planner: +``` +User: "Clean the data and create a visualization" +System: Uses only enabled agents to create the plan +``` + +## Database Schema + +### AgentTemplate Table +```sql +CREATE TABLE agent_templates ( + template_id SERIAL PRIMARY KEY, + template_name VARCHAR UNIQUE NOT NULL, + display_name VARCHAR, + description TEXT, + prompt_template TEXT, + category VARCHAR, + is_premium_only BOOLEAN DEFAULT FALSE, + is_active BOOLEAN DEFAULT TRUE, + created_at TIMESTAMP, + updated_at TIMESTAMP +); +``` + +### UserTemplatePreference Table +```sql +CREATE TABLE user_template_preferences ( + user_id INTEGER, + template_id INTEGER, + is_enabled BOOLEAN DEFAULT FALSE, + usage_count INTEGER DEFAULT 0, + last_used_at TIMESTAMP, + created_at TIMESTAMP, + updated_at TIMESTAMP, + PRIMARY KEY (user_id, template_id), + FOREIGN KEY (user_id) REFERENCES users(user_id), + FOREIGN KEY (template_id) REFERENCES agent_templates(template_id) +); +``` + +## Troubleshooting + +### Common Issues + +1. **No agents available in planner** + - Check if user has enabled any templates: `GET /templates/user/{user_id}/enabled` + - Enable templates using the toggle endpoint + +2. **Default agents not found** + - Run the load script: `python load_default_agents.py` + - Check if agents exist: `GET /templates/` + +3. **Import errors in load script** + - Ensure you're in the backend directory + - Check that all dependencies are installed + - Verify database connection + +### Logs +Check the application logs for detailed error messages: +```bash +tail -f logs/templates_routes.log +tail -f logs/agents.log +``` + +## Migration from Old System + +If migrating from the previous custom agents system: + +1. **Data Migration**: Existing custom agents should be migrated to the new template system +2. **User Preferences**: Users will need to re-enable their preferred agents +3. **API Updates**: Update frontend code to use new template endpoints +4. **Testing**: Verify planner works with enabled templates only + +## Support + +For issues or questions: +1. Check the logs for error messages +2. Verify database connections +3. Ensure proper API endpoint usage +4. Test with the load script first \ No newline at end of file diff --git a/auto-analyst-backend/app.py b/auto-analyst-backend/app.py index 6ecee146..9e8d1221 100644 --- a/auto-analyst-backend/app.py +++ b/auto-analyst-backend/app.py @@ -255,19 +255,7 @@ def clear_console(): logger.log_message(f"Housing.csv not found at {os.path.abspath(housing_csv_path)}", level=logging.ERROR) raise FileNotFoundError(f"Housing.csv not found at {os.path.abspath(housing_csv_path)}") -AVAILABLE_AGENTS = { - "data_viz_agent": data_viz_agent, - "sk_learn_agent": sk_learn_agent, - "statistical_analytics_agent": statistical_analytics_agent, - "preprocessing_agent": preprocessing_agent, -} - -PLANNER_AGENTS = { - "planner_preprocessing_agent": planner_preprocessing_agent, - "planner_sk_learn_agent": planner_sk_learn_agent, - "planner_statistical_analytics_agent": planner_statistical_analytics_agent, - "planner_data_viz_agent": planner_data_viz_agent, -} +# All agents are now loaded from database - no hardcoded dictionaries needed # Add session header X_SESSION_ID = APIKeyHeader(name="X-Session-ID", auto_error=False) @@ -275,7 +263,7 @@ def clear_console(): # Update AppState class to use SessionManager class AppState: def __init__(self): - self._session_manager = SessionManager(styling_instructions, PLANNER_AGENTS) + self._session_manager = SessionManager(styling_instructions, {}) # Empty dict, agents loaded from DB self.model_config = DEFAULT_MODEL_CONFIG.copy() # Update the SessionManager with the current model_config self._session_manager._app_model_config = self.model_config @@ -326,17 +314,66 @@ def get_chat_history_name_agent(self): def get_deep_analyzer(self, session_id: str): """Get or create deep analysis module for a session""" session_state = self.get_session_state(session_id) - if not hasattr(session_state, 'deep_analyzer') or session_state.get('deep_analyzer') is None: - # Create agents dictionary for deep analysis - deep_agents = { - "planner_data_viz_agent": dspy.asyncify(dspy.ChainOfThought(planner_data_viz_agent)), - "planner_statistical_analytics_agent": dspy.asyncify(dspy.ChainOfThought(planner_statistical_analytics_agent)), - "planner_sk_learn_agent": dspy.asyncify(dspy.ChainOfThought(planner_sk_learn_agent)), - "planner_preprocessing_agent": dspy.asyncify(dspy.ChainOfThought(planner_preprocessing_agent)) - } + user_id = session_state.get("user_id") + + # Check if we need to recreate the deep analyzer (user changed or doesn't exist) + current_analyzer = session_state.get('deep_analyzer') + analyzer_user_id = session_state.get('deep_analyzer_user_id') + + if (not current_analyzer or + analyzer_user_id != user_id or + not hasattr(session_state, 'deep_analyzer')): + + logger.log_message(f"Creating/recreating deep analyzer for session {session_id}, user_id: {user_id}", level=logging.INFO) + + # Load user-enabled agents from database using preference system + from src.db.init_db import session_factory + from src.agents.agents import load_user_enabled_templates_for_planner_from_db + + db_session = session_factory() + try: + # Load user-enabled agents for planner (respects preferences) + if user_id: + enabled_agents_dict = load_user_enabled_templates_for_planner_from_db(user_id, db_session) + logger.log_message(f"Deep analyzer loaded {len(enabled_agents_dict)} enabled agents for user {user_id}: {list(enabled_agents_dict.keys())}", level=logging.INFO) + else: + # Fallback to default agents if no user_id + logger.log_message("No user_id in session, loading default agents for deep analysis", level=logging.WARNING) + from src.agents.agents import preprocessing_agent, statistical_analytics_agent, sk_learn_agent, data_viz_agent + enabled_agents_dict = { + "preprocessing_agent": preprocessing_agent, + "statistical_analytics_agent": statistical_analytics_agent, + "sk_learn_agent": sk_learn_agent, + "data_viz_agent": data_viz_agent + } + + # Create agents dictionary for deep analysis using enabled agents + deep_agents = {} + deep_agents_desc = {} + + for agent_name, signature in enabled_agents_dict.items(): + deep_agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(signature)) + # Get agent description from database + deep_agents_desc[agent_name] = get_agent_description(agent_name) + + logger.log_message(f"Deep analyzer initialized with agents: {list(deep_agents.keys())}", level=logging.INFO) + + except Exception as e: + logger.log_message(f"Error loading agents for deep analysis: {str(e)}", level=logging.ERROR) + # Fallback to minimal set + from src.agents.agents import preprocessing_agent, statistical_analytics_agent, sk_learn_agent, data_viz_agent + deep_agents = { + "preprocessing_agent": dspy.asyncify(dspy.ChainOfThought(preprocessing_agent)), + "statistical_analytics_agent": dspy.asyncify(dspy.ChainOfThought(statistical_analytics_agent)), + "sk_learn_agent": dspy.asyncify(dspy.ChainOfThought(sk_learn_agent)), + "data_viz_agent": dspy.asyncify(dspy.ChainOfThought(data_viz_agent)) + } + deep_agents_desc = {name: get_agent_description(name) for name in deep_agents.keys()} + finally: + db_session.close() - deep_agents_desc = PLANNER_AGENTS_WITH_DESCRIPTION session_state['deep_analyzer'] = deep_analysis_module(agents=deep_agents, agents_desc=deep_agents_desc) + session_state['deep_analyzer_user_id'] = user_id # Track which user this analyzer was created for return session_state['deep_analyzer'] @@ -344,6 +381,7 @@ def get_deep_analyzer(self, session_id: str): app = FastAPI(title="AI Analytics API", version="1.0") app.state = AppState() + # Configure middleware # Use a wildcard for local development or read from environment is_development = os.getenv("ENVIRONMENT", "development").lower() == "development" @@ -450,15 +488,15 @@ async def chat_with_agent( timeout=REQUEST_TIMEOUT_SECONDS ) else: - # All standard/template agents - use auto_analyst_ind - standard_agent_sigs = [AVAILABLE_AGENTS[agent] for agent in standard_agents] + # All standard/template agents - use auto_analyst_ind which loads from DB user_id = session_state.get("user_id") - # Create database session for template loading + # Create database session for agent loading from src.db.init_db import session_factory db_session = session_factory() try: - agent = auto_analyst_ind(agents=standard_agent_sigs, retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session) + # auto_analyst_ind will load all agents from database + agent = auto_analyst_ind(agents=[], retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session) session_lm = get_session_lm(session_state) with dspy.context(lm=session_lm): response = await asyncio.wait_for( @@ -469,31 +507,15 @@ async def chat_with_agent( db_session.close() else: # Single agent case - if _is_standard_agent(agent_name): - # Standard agent - use auto_analyst_ind - user_id = session_state.get("user_id") - - # Create database session for template loading - from src.db.init_db import session_factory - db_session = session_factory() - try: - agent = auto_analyst_ind(agents=[AVAILABLE_AGENTS[agent_name]], retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session) - session_lm = get_session_lm(session_state) - with dspy.context(lm=session_lm): - response = await asyncio.wait_for( - agent.forward(enhanced_query, agent_name), - timeout=REQUEST_TIMEOUT_SECONDS - ) - finally: - db_session.close() - elif _is_template_agent(agent_name): - # Template agent - use auto_analyst_ind with empty agents list (templates loaded in init) + if _is_standard_agent(agent_name) or _is_template_agent(agent_name): + # Standard or template agent - use auto_analyst_ind which loads from DB user_id = session_state.get("user_id") - # Create database session for template loading + # Create database session for agent loading from src.db.init_db import session_factory db_session = session_factory() try: + # auto_analyst_ind will load all agents from database agent = auto_analyst_ind(agents=[], retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session) session_lm = get_session_lm(session_state) with dspy.context(lm=session_lm): @@ -614,48 +636,37 @@ def _update_session_from_query_params(request_obj: Request, session_state: dict) def _validate_agent_name(agent_name: str, session_state: dict = None): - """Validate that the requested agent(s) exist in either standard agents or user's custom agents""" + """Validate that the agent name(s) are available""" if "," in agent_name: + # Multiple agents agent_list = [agent.strip() for agent in agent_name.split(",")] for agent in agent_list: if not _is_agent_available(agent, session_state): available_agents = _get_available_agents_list(session_state) raise HTTPException( - status_code=404, + status_code=400, detail=f"Agent '{agent}' not found. Available agents: {available_agents}" ) - elif not _is_agent_available(agent_name, session_state): - available_agents = _get_available_agents_list(session_state) + else: + # Single agent + if not _is_agent_available(agent_name, session_state): + available_agents = _get_available_agents_list(session_state) raise HTTPException( - status_code=404, - detail=f"Agent '{agent_name}' not found. Available agents: {available_agents}" + status_code=400, + detail=f"Agent '{agent_name}' not found. Available agents: {available_agents}" ) def _is_agent_available(agent_name: str, session_state: dict = None) -> bool: - """Check if agent is available in either standard agents, template agents, or user's custom agents""" - # Check standard agents - if agent_name in AVAILABLE_AGENTS: + """Check if an agent is available (standard, template, or custom)""" + # Check if it's a standard agent + if _is_standard_agent(agent_name): return True - # Check template agents - try: - from src.db.init_db import session_factory - from src.db.schemas.models import AgentTemplate - - db_session = session_factory() - try: - template = db_session.query(AgentTemplate).filter( - AgentTemplate.template_name == agent_name, - AgentTemplate.is_active == True - ).first() - if template: - return True - finally: - db_session.close() - except Exception as e: - logger.log_message(f"Error checking template availability for {agent_name}: {str(e)}", level=logging.ERROR) + # Check if it's a template agent + if _is_template_agent(agent_name): + return True - # Check custom agents if session has an AI system with custom agents + # Check if it's a custom agent in session if session_state and "ai_system" in session_state: ai_system = session_state["ai_system"] if hasattr(ai_system, 'agents') and agent_name in ai_system.agents: @@ -664,22 +675,32 @@ def _is_agent_available(agent_name: str, session_state: dict = None) -> bool: return False def _get_available_agents_list(session_state: dict = None) -> list: - """Get list of all available agents (standard + custom)""" - available = list(AVAILABLE_AGENTS.keys()) + """Get list of all available agents from database""" + from src.db.init_db import session_factory + from src.agents.agents import load_all_available_templates_from_db - # Add custom agents if available - if session_state and "ai_system" in session_state: - ai_system = session_state["ai_system"] - if hasattr(ai_system, 'agents'): - custom_agents = [name for name in ai_system.agents.keys() - if name not in AVAILABLE_AGENTS and name != 'basic_qa_agent'] - available.extend(custom_agents) + # Core agents (always available) + available = ["preprocessing_agent", "statistical_analytics_agent", "sk_learn_agent", "data_viz_agent"] + + # Add template agents from database + db_session = session_factory() + try: + template_agents_dict = load_all_available_templates_from_db(db_session) + # template_agents_dict is a dict with template_name as keys + template_names = [template_name for template_name in template_agents_dict.keys() + if template_name not in available and template_name != 'basic_qa_agent'] + available.extend(template_names) + except Exception as e: + logger.log_message(f"Error loading template agents: {str(e)}", level=logging.ERROR) + finally: + db_session.close() return available def _is_standard_agent(agent_name: str) -> bool: - """Check if agent is a standard agent (not custom or template)""" - return agent_name in AVAILABLE_AGENTS + """Check if agent is one of the 4 core standard agents""" + standard_agents = ["preprocessing_agent", "statistical_analytics_agent", "sk_learn_agent", "data_viz_agent"] + return agent_name in standard_agents def _is_template_agent(agent_name: str) -> bool: """Check if agent is a template agent""" @@ -999,66 +1020,54 @@ async def _execute_plan_with_timeout(ai_system, enhanced_query, plan_response): # Add an endpoint to list available agents @app.get("/agents", response_model=dict) async def list_agents(request: Request, session_id: str = Depends(get_session_id_dependency)): + """Get all available agents (standard, template, and custom)""" session_state = app.state.get_session_state(session_id) - # Check if user_id is provided in query params to associate with session - user_id_param = request.query_params.get("user_id") - if user_id_param: - try: - user_id = int(user_id_param) - # Associate the user with this session to load custom agents - app.state.set_session_user(session_id, user_id) - # Refresh session state after user association - session_state = app.state.get_session_state(session_id) - except (ValueError, TypeError): - logger.log_message(f"Invalid user_id in agents endpoint: {user_id_param}", level=logging.WARNING) - - # Get user-specific agent list including custom agents - available_agents_list = _get_available_agents_list(session_state) - standard_agents = list(AVAILABLE_AGENTS.keys()) - planner_agents = list(PLANNER_AGENTS.keys()) - - # Get template agents from database - template_agents = [] try: + # Get all available agents from database and session + available_agents_list = _get_available_agents_list(session_state) + + # Categorize agents + standard_agents = ["preprocessing_agent", "statistical_analytics_agent", "sk_learn_agent", "data_viz_agent"] + + # Get template agents from database from src.db.init_db import session_factory - from src.db.schemas.models import AgentTemplate + from src.agents.agents import load_all_available_templates_from_db db_session = session_factory() try: - templates = db_session.query(AgentTemplate).filter( - AgentTemplate.is_active == True - ).all() - - template_agents = [template.template_name for template in templates] - logger.log_message(f"Found {len(template_agents)} template agents", level=logging.DEBUG) - + template_agents_dict = load_all_available_templates_from_db(db_session) + # template_agents_dict is a dict with template_name as keys + template_agents = [template_name for template_name in template_agents_dict.keys() + if template_name not in standard_agents and template_name != 'basic_qa_agent'] + except Exception as e: + logger.log_message(f"Error loading template agents in /agents endpoint: {str(e)}", level=logging.ERROR) + template_agents = [] finally: db_session.close() + + # Get custom agents from session + custom_agents = [] + if session_state and "ai_system" in session_state: + ai_system = session_state["ai_system"] + if hasattr(ai_system, 'agents'): + custom_agents = [agent for agent in available_agents_list + if agent not in standard_agents and agent not in template_agents] + + # Ensure template agents are in the available list + for template_agent in template_agents: + if template_agent not in available_agents_list: + available_agents_list.append(template_agent) + + return { + "available_agents": available_agents_list, + "standard_agents": standard_agents, + "template_agents": template_agents, + "custom_agents": custom_agents + } except Exception as e: - logger.log_message(f"Error fetching template agents: {str(e)}", level=logging.ERROR) - - # Custom agents are user-created agents (not standard, not planner, not template) - custom_agents = [agent for agent in available_agents_list - if agent not in standard_agents and agent not in planner_agents and agent not in template_agents] - - # Add template agents to available agents list if they're not already there - for template_agent in template_agents: - if template_agent not in available_agents_list: - available_agents_list.append(template_agent) - - return { - "available_agents": available_agents_list, - "standard_agents": standard_agents, - "custom_agents": custom_agents, - "template_agents": template_agents, - "planner_agents": planner_agents, - "deep_analysis": { - "available": True, - "description": "Comprehensive multi-step analysis with automated planning" - }, - "description": "List of available specialized agents that can be called using @agent_name" - } + logger.log_message(f"Error getting agents list: {str(e)}", level=logging.ERROR) + raise HTTPException(status_code=500, detail=f"Error getting agents list: {str(e)}") @app.get("/health", response_model=dict) async def health(): @@ -1162,7 +1171,7 @@ async def deep_analysis_streaming( session_lm = dspy.LM(model="anthropic/claude-sonnet-4-20250514", max_tokens=7000, temperature=0.5) return StreamingResponse( - _generate_deep_analysis_stream(session_state, request.goal, session_lm), + _generate_deep_analysis_stream(session_state, request.goal, session_lm, session_id), media_type='text/event-stream', headers={ 'Cache-Control': 'no-cache', @@ -1179,7 +1188,7 @@ async def deep_analysis_streaming( logger.log_message(f"Streaming deep analysis failed: {str(e)}", level=logging.ERROR) raise HTTPException(status_code=500, detail=f"Streaming deep analysis failed: {str(e)}") -async def _generate_deep_analysis_stream(session_state: dict, goal: str, session_lm): +async def _generate_deep_analysis_stream(session_state: dict, goal: str, session_lm, session_id: str): """Generate streaming responses for deep analysis""" # Track the start time for duration calculation start_time = datetime.now(UTC) @@ -1285,8 +1294,9 @@ async def update_report_in_db(status, progress, step=None, content=None): # Update DB status to running await update_report_in_db("running", 5) - # Get deep analyzer - deep_analyzer = app.state.get_deep_analyzer(session_state.get("session_id", "default")) + # Get deep analyzer - use the correct session_id from the session_state + logger.log_message(f"Getting deep analyzer for session_id: {session_id}, user_id: {user_id}", level=logging.INFO) + deep_analyzer = app.state.get_deep_analyzer(session_id) # Make the dataset available globally for code execution globals()['df'] = df @@ -1510,6 +1520,49 @@ async def download_html_report( logger.log_message(f"Failed to generate HTML report: {str(e)}", level=logging.ERROR) raise HTTPException(status_code=500, detail=f"Failed to generate report: {str(e)}") +@app.get("/debug/deep_analysis_agents") +async def debug_deep_analysis_agents(session_id: str = Depends(get_session_id_dependency)): + """Debug endpoint to show which agents are loaded for deep analysis""" + session_state = app.state.get_session_state(session_id) + user_id = session_state.get("user_id") + + try: + # Get the deep analyzer for this session + deep_analyzer = app.state.get_deep_analyzer(session_id) + + # Get the agents from the deep analyzer + available_agents = list(deep_analyzer.agents.keys()) if hasattr(deep_analyzer, 'agents') else [] + + # Also get the raw enabled agents from database + from src.db.init_db import session_factory + from src.agents.agents import load_user_enabled_templates_for_planner_from_db + + db_session = session_factory() + try: + if user_id: + enabled_agents_dict = load_user_enabled_templates_for_planner_from_db(user_id, db_session) + db_enabled_agents = list(enabled_agents_dict.keys()) + else: + db_enabled_agents = ["No user_id - using defaults"] + finally: + db_session.close() + + return { + "session_id": session_id, + "user_id": user_id, + "deep_analyzer_agents": available_agents, + "db_enabled_agents": db_enabled_agents, + "agents_match": set(available_agents) == set(db_enabled_agents) if user_id else "N/A" + } + + except Exception as e: + logger.log_message(f"Error in debug endpoint: {str(e)}", level=logging.ERROR) + return { + "error": str(e), + "session_id": session_id, + "user_id": user_id + } + # In the section where routers are included, add the session_router app.include_router(chat_router) app.include_router(analytics_router) diff --git a/auto-analyst-backend/chat_database.db b/auto-analyst-backend/chat_database.db index c8fe109a..e4dbfeb5 100644 --- a/auto-analyst-backend/chat_database.db +++ b/auto-analyst-backend/chat_database.db @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5182504f09b433865500dea5844a934662270e3aba11fae15c58df19855b2ba1 -size 81920 +oid sha256:653453e8e99f92f0dd007bde41a21169a2fa32e8230b4d7903467c8787d199c9 +size 1581056 diff --git a/auto-analyst-backend/heatmap_correlation.png b/auto-analyst-backend/heatmap_correlation.png new file mode 100644 index 00000000..851174bf --- /dev/null +++ b/auto-analyst-backend/heatmap_correlation.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f478326d2aaeebf15aff548357f32202f7ac95c24c76e91d52fa999cb63783d0 +size 408092 diff --git a/auto-analyst-backend/load_default_agents.py b/auto-analyst-backend/load_default_agents.py new file mode 100644 index 00000000..be0a9bc2 --- /dev/null +++ b/auto-analyst-backend/load_default_agents.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +""" +Script to load default agents into the AgentTemplate table. +Run this script to populate the database with the default agents. +""" + +import sys +import os +from pathlib import Path + +# Add the src directory to the path +src_path = Path(__file__).parent / "src" +sys.path.append(str(src_path)) + +def main(): + try: + from src.db.init_default_agents import initialize_default_agents + + # Initialize default agents with force update enabled + success = initialize_default_agents(force_update=True) + + if success: + print("✅ Successfully loaded default agents into the database!") + print("The following agents are now available:") + print(" • Data Preprocessing Agent (preprocessing_agent)") + print(" • Statistical Analytics Agent (statistical_analytics_agent)") + print(" • Machine Learning Agent (sk_learn_agent)") + print(" • Data Visualization Agent (data_viz_agent)") + else: + print("❌ Failed to load default agents") + sys.exit(1) + + except ImportError as e: + print(f"❌ Import error: {e}") + print("Make sure you're running this script from the auto-analyst-backend directory") + sys.exit(1) + except Exception as e: + print(f"❌ Error: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/auto-analyst-backend/src/agents/agents.py b/auto-analyst-backend/src/agents/agents.py index 16ef0a66..fc2745d2 100644 --- a/auto-analyst-backend/src/agents/agents.py +++ b/auto-analyst-backend/src/agents/agents.py @@ -22,16 +22,23 @@ def create_custom_agent_signature(agent_name, description, prompt_template): A dspy.Signature class with the custom prompt and standard input/output fields """ - # Standard input/output fields that match standard agents (like data_viz_agent) + # Check if this is a visualization agent to determine input fields + is_viz_agent = 'viz' in agent_name.lower() or 'visual' in agent_name.lower() or 'plot' in agent_name.lower() or 'chart' in agent_name.lower() + + # Standard input/output fields that match the unified agent signatures class_attributes = { '__doc__': prompt_template, # The custom prompt becomes the docstring 'goal': dspy.InputField(desc="User-defined goal which includes information about data and task they want to perform"), 'dataset': dspy.InputField(desc="Provides information about the data in the data frame. Only use column names and dataframe_name as in this context"), - 'styling_index': dspy.InputField(desc='Provides instructions on how to style outputs and formatting'), + 'plan_instructions': dspy.InputField(desc="Agent-level instructions about what to create and receive (optional for individual use)", default=""), 'code': dspy.OutputField(desc="Generated Python code for the analysis"), 'summary': dspy.OutputField(desc="A concise bullet-point summary of what was done and key results") } + # Add styling_index for visualization agents + if is_viz_agent: + class_attributes['styling_index'] = dspy.InputField(desc='Provides instructions on how to style outputs and formatting') + # Create the dynamic signature class CustomAgentSignature = type(agent_name, (dspy.Signature,), class_attributes) return CustomAgentSignature @@ -39,7 +46,7 @@ def create_custom_agent_signature(agent_name, description, prompt_template): def load_user_enabled_templates_from_db(user_id, db_session): """ Load template agents that are enabled for a specific user from the database. - All templates are enabled by default unless explicitly disabled by user preference. + Default agents are enabled by default unless explicitly disabled by user preference. Args: user_id: ID of the user @@ -56,6 +63,14 @@ def load_user_enabled_templates_from_db(user_id, db_session): if not user_id: return agent_signatures + # Get list of default agent names that should be enabled by default + default_agent_names = [ + "preprocessing_agent", + "statistical_analytics_agent", + "sk_learn_agent", + "data_viz_agent" + ] + # Get all active templates all_templates = db_session.query(AgentTemplate).filter( AgentTemplate.is_active == True @@ -68,10 +83,13 @@ def load_user_enabled_templates_from_db(user_id, db_session): UserTemplatePreference.template_id == template.template_id ).first() - # Template is disabled by default unless explicitly enabled - # Only enabled if preference record exists and is_enabled=True - is_enabled = preference.is_enabled if preference else False + # Determine if template should be enabled by default + is_default_agent = template.template_name in default_agent_names + default_enabled = is_default_agent # Default agents enabled by default, others disabled + # Template is enabled by default for default agents, disabled for others + is_enabled = preference.is_enabled if preference else default_enabled + if is_enabled: # Create dynamic signature for each enabled template signature = create_custom_agent_signature( @@ -90,6 +108,7 @@ def load_user_enabled_templates_from_db(user_id, db_session): def load_user_enabled_templates_for_planner_from_db(user_id, db_session): """ Load template agents that are enabled for planner use (max 10, prioritized by usage). + Default agents are enabled by default unless explicitly disabled by user preference. Args: user_id: ID of the user @@ -100,36 +119,62 @@ def load_user_enabled_templates_for_planner_from_db(user_id, db_session): """ try: from src.db.schemas.models import AgentTemplate, UserTemplatePreference + from datetime import datetime, UTC agent_signatures = {} if not user_id: return agent_signatures - # Get enabled templates ordered by usage (most used first) and limit to 10 - enabled_preferences = db_session.query(UserTemplatePreference).filter( - UserTemplatePreference.user_id == user_id, - UserTemplatePreference.is_enabled == True - ).order_by( - UserTemplatePreference.usage_count.desc(), - UserTemplatePreference.last_used_at.desc() - ).limit(10).all() + # Get list of default agent names that should be enabled by default + default_agent_names = [ + "preprocessing_agent", + "statistical_analytics_agent", + "sk_learn_agent", + "data_viz_agent" + ] - for preference in enabled_preferences: - # Get template details - template = db_session.query(AgentTemplate).filter( - AgentTemplate.template_id == preference.template_id, - AgentTemplate.is_active == True + # Get all active templates + all_templates = db_session.query(AgentTemplate).filter( + AgentTemplate.is_active == True + ).all() + + enabled_templates = [] + for template in all_templates: + # Check if user has a preference record for this template + preference = db_session.query(UserTemplatePreference).filter( + UserTemplatePreference.user_id == user_id, + UserTemplatePreference.template_id == template.template_id ).first() - if template: - # Create dynamic signature for each enabled template - signature = create_custom_agent_signature( - template.template_name, - template.description, - template.prompt_template - ) - agent_signatures[template.template_name] = signature + # Determine if template should be enabled by default + is_default_agent = template.template_name in default_agent_names + default_enabled = is_default_agent # Default agents enabled by default, others disabled + + # Template is enabled by default for default agents, disabled for others + is_enabled = preference.is_enabled if preference else default_enabled + + if is_enabled: + enabled_templates.append({ + 'template': template, + 'preference': preference, + 'usage_count': preference.usage_count if preference else 0, + 'last_used_at': preference.last_used_at if preference else None + }) + + # Sort by usage (most used first) and limit to 10 + enabled_templates.sort(key=lambda x: (x['usage_count'], x['last_used_at'] or datetime.min.replace(tzinfo=UTC)), reverse=True) + enabled_templates = enabled_templates[:10] + + for item in enabled_templates: + template = item['template'] + # Create dynamic signature for each enabled template + signature = create_custom_agent_signature( + template.template_name, + template.description, + template.prompt_template + ) + agent_signatures[template.template_name] = signature logger.log_message(f"Loaded {len(agent_signatures)} templates for planner", level=logging.DEBUG) return agent_signatures @@ -251,7 +296,6 @@ def load_all_available_templates_from_db(db_session): ) agent_signatures[template.template_name] = signature - logger.log_message(f"Loaded {len(agent_signatures)} templates", level=logging.INFO) return agent_signatures except Exception as e: @@ -262,43 +306,30 @@ def load_all_available_templates_from_db(db_session): # === END CUSTOM AGENT FUNCTIONALITY === -AGENTS_WITH_DESCRIPTION = { - "preprocessing_agent": "Cleans and prepares a DataFrame using Pandas and NumPy—handles missing values, detects column types, and converts date strings to datetime.", - "statistical_analytics_agent": "Performs statistical analysis (e.g., regression, seasonal decomposition) using statsmodels, with proper handling of categorical data and missing values.", - "sk_learn_agent": "Trains and evaluates machine learning models using scikit-learn, including classification, regression, and clustering with feature importance insights.", - "data_viz_agent": "Generates interactive visualizations with Plotly, selecting the best chart type to reveal trends, comparisons, and insights based on the analysis goal." -} - -PLANNER_AGENTS_WITH_DESCRIPTION = { - "planner_preprocessing_agent": ( - "Cleans and prepares a DataFrame using Pandas and NumPy" - "handles missing values, detects column types, and converts date strings to datetime. " - "Outputs a cleaned DataFrame for the planner_statistical_analytics_agent." - ), - "planner_statistical_analytics_agent": ( - "Takes the cleaned DataFrame from preprocessing, performs statistical analysis " - "(e.g., regression, seasonal decomposition) using statsmodels with proper handling " - "of categorical data and remaining missing values. " - "Produces summary statistics and model diagnostics for the planner_sk_learn_agent." - ), - "planner_sk_learn_agent": ( - "Receives summary statistics and the cleaned data, trains and evaluates machine " - "learning models using scikit-learn (classification, regression, clustering), " - "and generates performance metrics and feature importance. " - "Passes the trained models and evaluation results to the planner_data_viz_agent." - ), - "planner_data_viz_agent": ( - "Consumes trained models and evaluation results to create interactive visualizations " - "with Plotly—selects the best chart type, applies styling, and annotates insights. " - "Delivers ready-to-share figures that communicate model performance and key findings." - ), -} - def get_agent_description(agent_name, is_planner=False): - if is_planner: - return PLANNER_AGENTS_WITH_DESCRIPTION[agent_name.lower()] if agent_name.lower() in PLANNER_AGENTS_WITH_DESCRIPTION else "No description available for this agent" - else: - return AGENTS_WITH_DESCRIPTION[agent_name.lower()] if agent_name.lower() in AGENTS_WITH_DESCRIPTION else "No description available for this agent" + """ + Get agent description from database instead of hardcoded dictionaries. + This function is kept for backward compatibility but will fetch from DB. + """ + try: + from src.db.init_db import session_factory + from src.db.schemas.models import AgentTemplate + + db_session = session_factory() + try: + template = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == agent_name, + AgentTemplate.is_active == True + ).first() + + if template: + return template.description + else: + return "No description available for this agent" + finally: + db_session.close() + except Exception as e: + return "No description available for this agent" # Agent to make a Chat history name from a query @@ -421,6 +452,11 @@ class custom_agent_instruction_generator(dspy.Signature): class advanced_query_planner(dspy.Signature): """ You are a advanced data analytics planner agent. Your task is to generate the most efficient plan—using the fewest necessary agents and variables—to achieve a user-defined goal. The plan must preserve data integrity, avoid unnecessary steps, and ensure clear data flow between agents. + +**CRITICAL**: Before planning, check if any agents are available in Agent_desc. If Agent_desc is empty or contains no active agents, respond with: +plan: no_agents_available +plan_instructions: {"message": "No agents are currently enabled for analysis. Please enable at least one agent (preprocessing, statistical analysis, machine learning, or visualization) in your template preferences to proceed with data analysis."} + **Inputs**: 1. Datasets (raw or preprocessed) 2. Agent descriptions (roles, variables they create/use, constraints) @@ -437,9 +473,9 @@ class advanced_query_planner(dspy.Signature): Example: 1 agent use goal: "Generate a bar plot showing sales by category after cleaning the raw data and calculating the average of the 'sales' column" Output: - plan: planner_data_viz_agent + plan: data_viz_agent { - "planner_data_viz_agent": { + "data_viz_agent": { "create": [ "cleaned_data: DataFrame - cleaned version of df (pd.Dataframe) after removing null values" ], @@ -451,9 +487,9 @@ class advanced_query_planner(dspy.Signature): } Example 3 Agent goal:"Clean the dataset, run a linear regression to model the relationship between marketing budget and sales, and visualize the regression line with confidence intervals." -plan: planner_preprocessing_agent -> planner_statistical_analytics_agent -> planner_data_viz_agent +plan: preprocessing_agent -> statistical_analytics_agent -> data_viz_agent { - "planner_preprocessing_agent": { + "preprocessing_agent": { "create": [ "cleaned_data: DataFrame - cleaned version of df with missing values handled and proper data types inferred" ], @@ -462,7 +498,7 @@ class advanced_query_planner(dspy.Signature): ], "instruction": "Clean df by handling missing values and converting column types (e.g., dates). Output cleaned_data for modeling." }, - "planner_statistical_analytics_agent": { + "statistical_analytics_agent": { "create": [ "regression_results: dict - model summary including coefficients, p-values, R², and confidence intervals" ], @@ -471,7 +507,7 @@ class advanced_query_planner(dspy.Signature): ], "instruction": "Perform linear regression using cleaned_data to model sales as a function of marketing budget. Return regression_results including coefficients and confidence intervals." }, - "planner_data_viz_agent": { + "data_viz_agent": { "create": [ "regression_plot: PlotlyFigure - visual plot showing regression line with confidence intervals" ], @@ -496,20 +532,25 @@ class basic_query_planner(dspy.Signature): """ You are the basic query planner in the system, you pick one agent, to answer the user's goal. Use the Agent_desc that describes the names and actions of agents available. + + **CRITICAL**: Before planning, check if any agents are available in Agent_desc. If Agent_desc is empty or contains no active agents, respond with: + plan: no_agents_available + plan_instructions: {"message": "No agents are currently enabled for analysis. Please enable at least one agent (preprocessing, statistical analysis, machine learning, or visualization) in your template preferences to proceed with data analysis."} + Example: Visualize height and salary? - plan:planner_data_viz_agent + plan:data_viz_agent plan_instructions: { - "planner_data_viz_agent": { + "data_viz_agent": { "create": ["scatter_plot"], "use": ["original_data"], "instruction": "use the original_data to create scatter_plot of height & salary, using plotly" } } Example: Tell me the correlation between X and Y - plan:planner_preprocessing_agent + plan:preprocessing_agent plan_instructions:{ - "planner_data_viz_agent": { + "data_viz_agent": { "create": ["correlation"], "use": ["original_data"], "instruction": "use the original_data to measure correlation of X & Y, using pandas" @@ -535,6 +576,11 @@ class intermediate_query_planner(dspy.Signature): 3. User-defined Goal You take these three inputs to develop a comprehensive plan to achieve the user-defined goal from the data & Agents available. In case you think the user-defined goal is infeasible you can ask the user to redefine or add more description to the goal. + + **CRITICAL**: Before planning, check if any agents are available in Agent_desc. If Agent_desc is empty or contains no active agents, respond with: + plan: no_agents_available + plan_instructions: {"message": "No agents are currently enabled for analysis. Please enable at least one agent (preprocessing, statistical analysis, machine learning, or visualization) in your template preferences to proceed with data analysis."} + Give your output in this format: plan: Agent1->Agent2 plan_instructions = { @@ -583,53 +629,115 @@ def __init__(self): self.allocator = dspy.Predict("goal,planner_desc,dataset->exact_word_complexity,reasoning") - async def forward(self, goal,dataset,Agent_desc): - complexity = self.allocator(goal=goal, planner_desc= str(self.planner_desc), dataset=str(dataset)) - # print(complexity) - if complexity.exact_word_complexity.strip() != "unrelated": + async def forward(self, goal, dataset, Agent_desc): + logger.log_message(f"Planner forward called with goal: {goal[:100]}...", level=logging.INFO) + logger.log_message(f"Agent descriptions: {Agent_desc}", level=logging.DEBUG) + + # Check if we have any agents available + if not Agent_desc or Agent_desc == "[]" or len(str(Agent_desc).strip()) < 10: + logger.log_message("No agents available for planning", level=logging.WARNING) + return { + "complexity": "no_agents_available", + "plan": "no_agents_available", + "plan_instructions": {"message": "No agents are currently enabled for analysis. Please enable at least one agent (preprocessing, statistical analysis, machine learning, or visualization) in your template preferences to proceed with data analysis."} + } + + try: + complexity = self.allocator(goal=goal, planner_desc=str(self.planner_desc), dataset=str(dataset)) + logger.log_message(f"Complexity determined: {complexity.exact_word_complexity.strip()}", level=logging.INFO) + + # If complexity is unrelated, return basic_qa_agent + if complexity.exact_word_complexity.strip() == "unrelated": + logger.log_message("Query classified as unrelated, using basic_qa_agent", level=logging.INFO) + return { + "complexity": complexity.exact_word_complexity.strip(), + "plan": "basic_qa_agent", + "plan_instructions": "{'basic_qa_agent':'Not a data related query, please ask a data related-query'}" + } + + # Try to get plan with determined complexity try: + logger.log_message(f"Attempting to plan with complexity: {complexity.exact_word_complexity.strip()}", level=logging.DEBUG) plan = await self.planners[complexity.exact_word_complexity.strip()](goal=goal, dataset=dataset, Agent_desc=Agent_desc) - + logger.log_message(f"Plan generated successfully: {plan}", level=logging.DEBUG) + + # Check if the planner returned no_agents_available + if hasattr(plan, 'plan') and 'no_agents_available' in str(plan.plan): + logger.log_message("Planner returned no_agents_available", level=logging.WARNING) + output = { + "complexity": "no_agents_available", + "plan": "no_agents_available", + "plan_instructions": {"message": "No agents are currently enabled for analysis. Please enable at least one agent (preprocessing, statistical analysis, machine learning, or visualization) in your template preferences to proceed with data analysis."} + } + else: + output = { + "complexity": complexity.exact_word_complexity.strip(), + "plan": dict(plan) + } + except Exception as e: + logger.log_message(f"Error with {complexity.exact_word_complexity.strip()} planner, falling back to intermediate: {str(e)}", level=logging.WARNING) + + # Fallback to intermediate planner plan = await self.planners["intermediate"](goal=goal, dataset=dataset, Agent_desc=Agent_desc) - - output = {"complexity":complexity.exact_word_complexity.strip() - ,"plan":dict(plan)} - else: - output = {"complexity":complexity.exact_word_complexity.strip() - ,"plan":dict(plan="basic_qa_agent", plan_instructions="""{'basic_qa_agent':'Not a data related query, please ask a data related-query'}""") - } - # print(output) + logger.log_message(f"Fallback plan generated: {plan}", level=logging.DEBUG) + + # Check if the fallback planner also returned no_agents_available + if hasattr(plan, 'plan') and 'no_agents_available' in str(plan.plan): + logger.log_message("Fallback planner also returned no_agents_available", level=logging.WARNING) + output = { + "complexity": "no_agents_available", + "plan": "no_agents_available", + "plan_instructions": {"message": "No agents are currently enabled for analysis. Please enable at least one agent (preprocessing, statistical analysis, machine learning, or visualization) in your template preferences to proceed with data analysis."} + } + else: + output = { + "complexity": "intermediate", + "plan": dict(plan) + } + + except Exception as e: + logger.log_message(f"Error in planner forward: {str(e)}", level=logging.ERROR) + # Return error response + return { + "complexity": "error", + "plan": "basic_qa_agent", + "plan_instructions": {"error": f"Planning error: {str(e)}"} + } + + logger.log_message(f"Final planner output: {output}", level=logging.INFO) return output -class planner_preprocessing_agent(dspy.Signature): +class preprocessing_agent(dspy.Signature): """ -You are a preprocessing agent in a multi-agent data analytics system. +You are a preprocessing agent that can work both individually and in multi-agent data analytics systems. You are given: -* A dataset (already loaded as `df`). -* A user-defined analysis goal (e.g., predictive modeling, exploration, cleaning). -* Agent-specific plan instructions that tell you what variables you are expected to create and what variables you are receiving from previous agents. -* processed_df is just an arbitrary name, it can be anything the planner says to clean! +* A dataset (already loaded as `df`). +* A user-defined analysis goal (e.g., predictive modeling, exploration, cleaning). +* Optional plan instructions that tell you what variables you are expected to create and what variables you are receiving from previous agents. + ### Your Responsibilities: -* Follow the provided plan and create only the required variables listed in the 'create' section of the plan instructions. -* Do not create fake data or introduce variables not explicitly part of the instructions. -* Do not read data from CSV ; the dataset (`df`) is already loaded and ready for processing. -* Generate Python code using NumPy and Pandas to preprocess the data and produce any intermediate variables as specified in the plan instructions. +* If plan_instructions are provided, follow the provided plan and create only the required variables listed in the 'create' section. +* If no plan_instructions are provided, perform standard data preprocessing based on the goal. +* Do not create fake data or introduce variables not explicitly part of the instructions. +* Do not read data from CSV; the dataset (`df`) is already loaded and ready for processing. +* Generate Python code using NumPy and Pandas to preprocess the data and produce any intermediate variables as specified. + ### Best Practices for Preprocessing: -1. Create a copy of the original DataFrame : It will always be stored as df, it already exists use it! +1. Create a copy of the original DataFrame: It will always be stored as df, it already exists use it! ```python processed_df = df.copy() ``` -2. Separate column types : +2. Separate column types: ```python numeric_cols = processed_df.select_dtypes(include='number').columns categorical_cols = processed_df.select_dtypes(include='object').columns ``` -3. Handle missing values : +3. Handle missing values: ```python for col in numeric_cols: processed_df[col] = processed_df[col].fillna(processed_df[col].median()) @@ -637,7 +745,7 @@ class planner_preprocessing_agent(dspy.Signature): for col in categorical_cols: processed_df[col] = processed_df[col].fillna(processed_df[col].mode()[0] if not processed_df[col].mode().empty else 'Unknown') ``` -4. Convert string columns to datetime safely : +4. Convert string columns to datetime safely: ```python def safe_to_datetime(x): try: @@ -647,138 +755,141 @@ def safe_to_datetime(x): cleaned_df['date_column'] = cleaned_df['date_column'].apply(safe_to_datetime) ``` -> Replace `processed_df`,'cleaned_df' and `date_column` with whatever names the user or planner provides. -5. Do not alter the DataFrame index : - Avoid using `reset_index()`, `set_index()`, or reindexing unless explicitly instructed. -6. Log assumptions and corrections in comments to clarify any choices made during preprocessing. -7. Do not mutate global state : Avoid in-place modifications unless clearly necessary (e.g., using `.copy()`). -8. Handle data types properly : +5. Do not alter the DataFrame index unless explicitly instructed. +6. Log assumptions and corrections in comments to clarify any choices made during preprocessing. +7. Do not mutate global state: Avoid in-place modifications unless clearly necessary (e.g., using `.copy()`). +8. Handle data types properly: * Avoid coercing types blindly (e.g., don't compare timestamps to strings or floats). * Use `pd.to_datetime(..., errors='coerce')` for safe datetime parsing. -9. Preserve column structure : Only drop or rename columns if explicitly instructed. +9. Preserve column structure: Only drop or rename columns if explicitly instructed. + ### Output: -1. Code : Python code that performs the requested preprocessing steps as per the plan instructions. -2. Summary : A brief explanation of what preprocessing was done (e.g., columns handled, missing value treatment). +1. Code: Python code that performs the requested preprocessing steps. +2. Summary: A brief explanation of what preprocessing was done (e.g., columns handled, missing value treatment). + ### Principles to Follow: --Never alter the DataFrame index unless explicitly instructed. --Handle missing data explicitly, filling with default values when necessary. --Preserve column structure and avoid unnecessary modifications. --Ensure data types are appropriate (e.g., dates parsed correctly). --Log assumptions in the code. +- Never alter the DataFrame index unless explicitly instructed. +- Handle missing data explicitly, filling with default values when necessary. +- Preserve column structure and avoid unnecessary modifications. +- Ensure data types are appropriate (e.g., dates parsed correctly). +- Log assumptions in the code. Respond in the user's language for all summary and reasoning but keep the code in english """ dataset = dspy.InputField(desc="The dataset, preloaded as df") goal = dspy.InputField(desc="User-defined goal for the analysis") - plan_instructions = dspy.InputField(desc="Agent-level instructions about what to create and receive") + plan_instructions = dspy.InputField(desc="Agent-level instructions about what to create and receive (optional for individual use)", default="") code = dspy.OutputField(desc="Generated Python code for preprocessing") summary = dspy.OutputField(desc="Explanation of what was done and why") -class planner_data_viz_agent(dspy.Signature): +class data_viz_agent(dspy.Signature): + """ +You are a data visualization agent that can work both individually and in multi-agent analytics pipelines. +Your primary responsibility is to generate visualizations based on the user-defined goal. + +You are provided with: +* **goal**: A user-defined goal outlining the type of visualization the user wants (e.g., "plot sales over time with trendline"). +* **dataset**: The dataset (e.g., `df_cleaned`) which will be passed to you by other agents in the pipeline. Do not assume or create any variables — the data is already present and valid when you receive it. +* **styling_index**: Specific styling instructions (e.g., axis formatting, color schemes) for the visualization. +* **plan_instructions**: Optional dictionary containing: + * **'create'**: List of visualization components you must generate (e.g., 'scatter_plot', 'bar_chart'). + * **'use'**: List of variables you must use to generate the visualizations. + * **'instructions'**: Additional instructions related to the creation of the visualizations. + +### Responsibilities: +1. **Strict Use of Provided Variables**: + * You must never create fake data. Only use the variables and datasets that are explicitly provided. + * If plan_instructions are provided and any variable listed in plan_instructions['use'] is missing, return an error. + * If no plan_instructions are provided, work with the available dataset directly. + +2. **Visualization Creation**: + * Based on the goal and optional 'create' section of plan_instructions, generate the required visualization using Plotly. + * Respect the user-defined goal in determining which type of visualization to create. + +3. **Performance Optimization**: + * If the dataset contains more than 50,000 rows, you must sample the data to 5,000 rows to improve performance: + ```python + if len(df) > 50000: + df = df.sample(5000, random_state=42) + ``` + +4. **Layout and Styling**: + * Apply formatting and layout adjustments as defined by the styling_index. + * Ensure that all axes (x and y) have consistent formats (e.g., using `K`, `M`, or 1,000 format, but not mixing formats). + +5. **Trendlines**: + * Trendlines should only be included if explicitly requested in the goal or plan_instructions. + +6. **Displaying the Visualization**: + * Use Plotly's `fig.show()` method to display the created chart. + * Never output raw datasets or the goal itself. Only the visualization code and the chart should be returned. + +7. **Error Handling**: + * If required dataset or variables are missing, return an error message indicating which specific variable is missing. + * If the goal or create instructions are ambiguous, return an error stating the issue. + +8. **No Data Modification**: + * Never modify the provided dataset or generate new data. If the data needs preprocessing, assume it's already been done by other agents. + +### Important Notes: +- Use update_yaxes, update_xaxes, not axis +- Each visualization must be generated as a separate figure using go.Figure() +- Do NOT use subplots under any circumstances +- Each figure must be returned individually using: fig.to_html(full_html=False) +- Use update_layout with xaxis and yaxis only once per figure +- Enhance readability with low opacity (0.4-0.7) where appropriate +- Apply visually distinct colors for different elements or categories +- Use only one number format consistently: either 'K', 'M', or comma-separated values +- Only include trendlines in scatter plots if the user explicitly asks for them +- Always end each visualization with: fig.to_html(full_html=False) + +Respond in the user's language for all summary and reasoning but keep the code in english """ - ### **Data Visualization Agent Definition** - You are the **data visualization agent** in a multi-agent analytics pipeline. Your primary responsibility is to **generate visualizations** based on the **user-defined goal** and the **plan instructions**. - You are provided with: - * **goal**: A user-defined goal outlining the type of visualization the user wants (e.g., "plot sales over time with trendline"). - * **dataset**: The dataset (e.g., `df_cleaned`) which will be passed to you by other agents in the pipeline. **Do not assume or create any variables** — **the data is already present and valid** when you receive it. - * **styling_index**: Specific styling instructions (e.g., axis formatting, color schemes) for the visualization. - * **plan_instructions**: A dictionary containing: - * **'create'**: List of **visualization components** you must generate (e.g., 'scatter_plot', 'bar_chart'). - * **'use'**: List of **variables you must use** to generate the visualizations. This includes datasets and any other variables provided by the other agents. - * **'instructions'**: A list of additional instructions related to the creation of the visualizations, such as requests for trendlines or axis formats. - --- - ### **Responsibilities**: - 1. **Strict Use of Provided Variables**: - * You must **never create fake data**. Only use the variables and datasets that are explicitly **provided** to you in the `plan_instructions['use']` section. All the required data **must already be available**. - * If any variable listed in `plan_instructions['use']` is missing or invalid, **you must return an error** and not proceed with any visualization. - 2. **Visualization Creation**: - * Based on the **'create'** section of the `plan_instructions`, generate the **required visualization** using **Plotly**. For example, if the goal is to plot a time series, you might generate a line chart. - * Respect the **user-defined goal** in determining which type of visualization to create. - 3. **Performance Optimization**: - * If the dataset contains **more than 50,000 rows**, you **must sample** the data to **5,000 rows** to improve performance. Use this method: - ```python - if len(df) > 50000: - df = df.sample(5000, random_state=42) - ``` - 4. **Layout and Styling**: - * Apply formatting and layout adjustments as defined by the **styling_index**. This may include: - * Axis labels and title formatting. - * Tick formats for axes. - * Color schemes or color maps for visual elements. - * You must ensure that all axes (x and y) have **consistent formats** (e.g., using `K`, `M`, or 1,000 format, but not mixing formats). - 5. **Trendlines**: - * Trendlines should **only be included** if explicitly requested in the **'instructions'** section of `plan_instructions`. - 6. **Displaying the Visualization**: - * Use Plotly's `fig.show()` method to display the created chart. - * **Never** output raw datasets or the **goal** itself. Only the visualization code and the chart should be returned. - 7. **Error Handling**: - * If the required dataset or variables are missing or invalid (i.e., not included in `plan_instructions['use']`), return an error message indicating which specific variable is missing or invalid. - * If the **goal** or **create** instructions are ambiguous or invalid, return an error stating the issue. - 8. **No Data Modification**: - * **Never** modify the provided dataset or generate new data. If the data needs preprocessing or cleaning, assume it's already been done by other agents. - --- - ### **Strict Conditions**: - * You **never** create any data. - * You **only** use the data and variables passed to you. - * If any required data or variable is missing or invalid, **you must stop** and return a clear error message. - * Respond in the user's language for all summary and reasoning but keep the code in english - * it should be update_yaxes, update_xaxes, not axis - By following these conditions and responsibilities, your role is to ensure that the **visualizations** are generated as per the user goal, using the valid data and instructions given to you. - """ goal = dspy.InputField(desc="User-defined chart goal (e.g. trendlines, scatter plots)") dataset = dspy.InputField(desc="Details of the dataframe (`df`) and its columns") styling_index = dspy.InputField(desc="Instructions for plot styling and layout formatting") - plan_instructions = dspy.InputField(desc="Variables to create and receive for visualization purposes") + plan_instructions = dspy.InputField(desc="Variables to create and receive for visualization purposes (optional for individual use)", default="") code = dspy.OutputField(desc="Plotly Python code for the visualization") summary = dspy.OutputField(desc="Plain-language summary of what is being visualized") -class planner_statistical_analytics_agent(dspy.Signature): +class statistical_analytics_agent(dspy.Signature): """ -**Agent Definition:** -You are a statistical analytics agent in a multi-agent data analytics pipeline. +You are a statistical analytics agent that can work both individually and in multi-agent data analytics pipelines. You are given: * A dataset (usually a cleaned or transformed version like `df_cleaned`). * A user-defined goal (e.g., regression, seasonal decomposition). -* Agent-specific **plan instructions** specifying: - * Which **variables** you are expected to **CREATE** (e.g., `regression_model`). - * Which **variables** you will **USE** (e.g., `df_cleaned`, `target_variable`). - * A set of **instructions** outlining additional processing or handling for these variables (e.g., handling missing values, adding constants, transforming features, etc.). -**Your Responsibilities:** +* Optional plan instructions specifying: + * Which variables you are expected to CREATE (e.g., `regression_model`). + * Which variables you will USE (e.g., `df_cleaned`, `target_variable`). + * A set of instructions outlining additional processing or handling for these variables. + +### Your Responsibilities: * Use the `statsmodels` library to implement the required statistical analysis. * Ensure that all strings are handled as categorical variables via `C(col)` in model formulas. * Always add a constant using `sm.add_constant()`. -* Do **not** modify the DataFrame's index. +* Do not modify the DataFrame's index. * Convert `X` and `y` to float before fitting the model. * Handle missing values before modeling. * Avoid any data visualization (that is handled by another agent). * Write output to the console using `print()`. -**If the goal is regression:** + +### If the goal is regression: * Use `statsmodels.OLS` with proper handling of categorical variables and adding a constant term. * Handle missing values appropriately. -**If the goal is seasonal decomposition:** + +### If the goal is seasonal decomposition: * Use `statsmodels.tsa.seasonal_decompose`. * Ensure the time series and period are correctly provided (i.e., `period` should not be `None`). -**You must not:** -* You must always create the variables in `plan_instructions['CREATE']`. -* **Never create the `df` variable**. Only work with the variables passed via the `plan_instructions`. -* Rely on hardcoded column names — use those passed via `plan_instructions`. -* Introduce or modify intermediate variables unless they are explicitly listed in `plan_instructions['CREATE']`. -**Instructions to Follow:** -1. **CREATE** only the variables specified in `plan_instructions['CREATE']`. Do not create any intermediate or new variables. -2. **USE** only the variables specified in `plan_instructions['USE']` to carry out the task. -3. Follow any **additional instructions** in `plan_instructions['INSTRUCTIONS']` (e.g., preprocessing steps, encoding, handling missing values). -4. **Do not reassign or modify** any variables passed via `plan_instructions`. These should be used as-is. -**Example Workflow:** -Given that the `plan_instructions` specifies variables to **CREATE** and **USE**, and includes instructions, your approach should look like this: -1. Use `df_cleaned` and the variables like `X` and `y` from `plan_instructions` for analysis. -2. Follow instructions for preprocessing (e.g., handle missing values or scale features). -3. If the goal is regression: - * Use `sm.OLS` for model fitting. - * Handle categorical variables via `C(col)` and add a constant term. -4. If the goal is seasonal decomposition: - * Ensure `period` is provided and use `sm.tsa.seasonal_decompose`. -5. Store the output variable as specified in `plan_instructions['CREATE']`. + +### Instructions to Follow: +1. If plan_instructions are provided: + * CREATE only the variables specified in plan_instructions['CREATE']. Do not create any intermediate or new variables. + * USE only the variables specified in plan_instructions['USE'] to carry out the task. + * Follow any additional instructions in plan_instructions['INSTRUCTIONS']. + * Do not reassign or modify any variables passed via plan_instructions. +2. If no plan_instructions are provided, perform standard statistical analysis based on the goal and available data. + ### Example Code Structure: ```python import statsmodels.api as sm @@ -808,109 +919,89 @@ def statistical_model(X, y, goal, period=None): except Exception as e: return f"An error occurred: {e}" ``` -**Summary:** -1. Always **USE** the variables passed in `plan_instructions['USE']` to carry out the task. -2. Only **CREATE** the variables specified in `plan_instructions['CREATE']`. Do not create any additional variables. -3. Follow any **additional instructions** in `plan_instructions['INSTRUCTIONS']` (e.g., handling missing values, adding constants). + +### Summary: +1. Always USE the variables passed in plan_instructions['USE'] to carry out the task (if provided). +2. Only CREATE the variables specified in plan_instructions['CREATE'] (if provided). +3. Follow any additional instructions in plan_instructions['INSTRUCTIONS'] (if provided). 4. Ensure reproducibility by setting the random state appropriately and handling categorical variables. 5. Focus on statistical analysis and avoid any unnecessary data manipulation. -**Output:** -* The **code** implementing the statistical analysis, including all required steps. -* A **summary** of what the statistical analysis does, how it's performed, and why it fits the goal. + +### Output: +* The code implementing the statistical analysis, including all required steps. +* A summary of what the statistical analysis does, how it's performed, and why it fits the goal. * Respond in the user's language for all summary and reasoning but keep the code in english """ dataset = dspy.InputField(desc="Preprocessed dataset, often named df_cleaned") goal = dspy.InputField(desc="The user's statistical analysis goal, e.g., regression or seasonal_decompose") - plan_instructions = dspy.InputField(desc="Instructions on variables to create and receive for statistical modeling") + plan_instructions = dspy.InputField(desc="Instructions on variables to create and receive for statistical modeling (optional for individual use)", default="") code = dspy.OutputField(desc="Python code for statistical modeling using statsmodels") summary = dspy.OutputField(desc="A concise bullet-point summary of the statistical analysis performed and key findings") - - -class planner_sk_learn_agent(dspy.Signature): +class sk_learn_agent(dspy.Signature): """ - **Agent Definition:** - You are a machine learning agent in a multi-agent data analytics pipeline. - You are given: - * A dataset (often cleaned and feature-engineered). - * A user-defined goal (e.g., classification, regression, clustering). - * Agent-specific **plan instructions** specifying: - * Which **variables** you are expected to **CREATE** (e.g., `trained_model`, `predictions`). - * Which **variables** you will **USE** (e.g., `df_cleaned`, `target_variable`, `feature_columns`). - * A set of **instructions** outlining additional processing or handling for these variables (e.g., handling missing values, applying transformations, or other task-specific guidelines). - **Your Responsibilities:** - * Use the scikit-learn library to implement the appropriate ML pipeline. - * Always split data into training and testing sets where applicable. - * Use `print()` for all outputs. - * Ensure your code is: - * **Reproducible**: Set `random_state=42` wherever applicable. - * **Modular**: Avoid deeply nested code. - * **Focused on model building**, not visualization (leave plotting to the `data_viz_agent`). - * Your task may include: - * Preprocessing inputs (e.g., encoding). - * Model selection and training. - * Evaluation (e.g., accuracy, RMSE, classification report). - **You must not:** - * Visualize anything (that's another agent's job). - * Rely on hardcoded column names — use those passed via `plan_instructions`. - * **Never create or modify any variables not explicitly mentioned in `plan_instructions['CREATE']`.** - * **Never create the `df` variable**. You will **only** work with the variables passed via the `plan_instructions`. - * Do not introduce intermediate variables unless they are listed in `plan_instructions['CREATE']`. - **Instructions to Follow:** - 1. **CREATE** only the variables specified in the `plan_instructions['CREATE']` list. Do not create any intermediate or new variables. - 2. **USE** only the variables specified in the `plan_instructions['USE']` list. You are **not allowed** to create or modify any variables not listed in the plan instructions. - 3. Follow any **processing instructions** in the `plan_instructions['INSTRUCTIONS']` list. This might include tasks like handling missing values, scaling features, or encoding categorical variables. Always perform these steps on the variables specified in the `plan_instructions`. - 4. Do **not reassign or modify** any variables passed via `plan_instructions`. These should be used as-is. - **Example Workflow:** - Given that the `plan_instructions` specifies variables to **CREATE** and **USE**, and includes instructions, your approach should look like this: - 1. Use `df_cleaned` and `feature_columns` from the `plan_instructions` to extract your features (`X`). - 2. Use `target_column` from `plan_instructions` to extract your target (`y`). - 3. If instructions are provided (e.g., scale or encode), follow them. - 4. Split data into training and testing sets using `train_test_split`. - 5. Train the model based on the received goal (classification, regression, etc.). - 6. Store the output variables as specified in `plan_instructions['CREATE']`. - ### Example Code Structure: - ```python - from sklearn.model_selection import train_test_split - from sklearn.linear_model import LogisticRegression - from sklearn.metrics import classification_report - from sklearn.preprocessing import StandardScaler - # Ensure that all variables follow plan instructions: - # Use received inputs: df_cleaned, feature_columns, target_column - X = df_cleaned[feature_columns] - y = df_cleaned[target_column] - # Apply any preprocessing instructions (e.g., scaling if instructed) - if 'scale' in plan_instructions['INSTRUCTIONS']: - scaler = StandardScaler() - X = scaler.fit_transform(X) - # Split the data into training and testing sets - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - # Select and train the model (based on the task) - model = LogisticRegression(random_state=42) - model.fit(X_train, y_train) - # Generate predictions - predictions = model.predict(X_test) - # Create the variable specified in 'plan_instructions': 'metrics' - metrics = classification_report(y_test, predictions) - # Print the results - print(metrics) - # Ensure the 'metrics' variable is returned as requested in the plan - ``` - **Summary:** - 1. Always **USE** the variables passed in `plan_instructions['USE']` to build the pipeline. - 2. Only **CREATE** the variables specified in `plan_instructions['CREATE']`. Do not create any additional variables. - 3. Follow any **additional instructions** in `plan_instructions['INSTRUCTIONS']` (e.g., preprocessing steps). - 4. Ensure reproducibility by setting `random_state=42` wherever necessary. - 5. Focus on model building, evaluation, and saving the required outputs—avoid any unnecessary variables. - **Output:** - * The **code** implementing the ML task, including all required steps. - * A **summary** of what the model does, how it is evaluated, and why it fits the goal. - * Respond in the user's language for all summary and reasoning but keep the code in english +You are a machine learning agent that can work both individually and in multi-agent data analytics pipelines. +You are given: +* A dataset (often cleaned and feature-engineered). +* A user-defined goal (e.g., classification, regression, clustering). +* Optional plan instructions specifying: + * Which variables you are expected to CREATE (e.g., `trained_model`, `predictions`). + * Which variables you will USE (e.g., `df_cleaned`, `target_variable`, `feature_columns`). + * A set of instructions outlining additional processing or handling for these variables. + +### Your Responsibilities: +* Use the scikit-learn library to implement the appropriate ML pipeline. +* Always split data into training and testing sets where applicable. +* Use `print()` for all outputs. +* Ensure your code is: + * Reproducible: Set `random_state=42` wherever applicable. + * Modular: Avoid deeply nested code. + * Focused on model building, not visualization (leave plotting to the `data_viz_agent`). +* Your task may include: + * Preprocessing inputs (e.g., encoding). + * Model selection and training. + * Evaluation (e.g., accuracy, RMSE, classification report). + +### You must not: +* Visualize anything (that's another agent's job). +* Rely on hardcoded column names — use those passed via plan_instructions or infer from data. +* Never create or modify any variables not explicitly mentioned in plan_instructions['CREATE'] (if provided). +* Never create the `df` variable. You will only work with the variables passed via the plan_instructions. +* Do not introduce intermediate variables unless they are listed in plan_instructions['CREATE'] (if provided). + +### Instructions to Follow: +1. If plan_instructions are provided: + * CREATE only the variables specified in the plan_instructions['CREATE'] list. + * USE only the variables specified in the plan_instructions['USE'] list. + * Follow any processing instructions in the plan_instructions['INSTRUCTIONS'] list. + * Do not reassign or modify any variables passed via plan_instructions. +2. If no plan_instructions are provided, perform standard machine learning analysis based on the goal and available data. + +### Example Workflow: +Given that the plan_instructions specifies variables to CREATE and USE, and includes instructions, your approach should look like this: +1. Use `df_cleaned` and `feature_columns` from the plan_instructions to extract your features (`X`). +2. Use `target_column` from plan_instructions to extract your target (`y`). +3. If instructions are provided (e.g., scale or encode), follow them. +4. Split data into training and testing sets using `train_test_split`. +5. Train the model based on the received goal (classification, regression, etc.). +6. Store the output variables as specified in plan_instructions['CREATE']. + +### Summary: +1. Always USE the variables passed in plan_instructions['USE'] to build the pipeline (if provided). +2. Only CREATE the variables specified in plan_instructions['CREATE'] (if provided). +3. Follow any additional instructions in plan_instructions['INSTRUCTIONS'] (if provided). +4. Ensure reproducibility by setting random_state=42 wherever necessary. +5. Focus on model building, evaluation, and saving the required outputs—avoid any unnecessary variables. + +### Output: +* The code implementing the ML task, including all required steps. +* A summary of what the model does, how it is evaluated, and why it fits the goal. +* Respond in the user's language for all summary and reasoning but keep the code in english """ dataset = dspy.InputField(desc="Input dataset, often cleaned and feature-selected (e.g., df_cleaned)") goal = dspy.InputField(desc="The user's machine learning goal (e.g., classification or regression)") - plan_instructions = dspy.InputField(desc="Instructions indicating what to create and what variables to receive") + plan_instructions = dspy.InputField(desc="Instructions indicating what to create and what variables to receive (optional for individual use)", default="") code = dspy.OutputField(desc="Scikit-learn based machine learning code") summary = dspy.OutputField(desc="Explanation of the ML approach and evaluation") @@ -924,140 +1015,7 @@ class goal_refiner_agent(dspy.Signature): goal = dspy.InputField(desc="The user defined goal ") refined_goal = dspy.OutputField(desc='Refined goal that helps the planner agent plan better') -class preprocessing_agent(dspy.Signature): - """You are a AI data-preprocessing agent. Generate clean and efficient Python code using NumPy and Pandas to perform introductory data preprocessing on a pre-loaded DataFrame df, based on the user's analysis goals. - Preprocessing Requirements: - 1. Identify Column Types - - Separate columns into numeric and categorical using: - categorical_columns = df.select_dtypes(include=[object, 'category']).columns.tolist() - numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() - 2. Handle Missing Values - - Numeric columns: Impute missing values using the mean of each column - - Categorical columns: Impute missing values using the mode of each column - 3. Convert Date Strings to Datetime - - For any column suspected to represent dates (in string format), convert it to datetime using: - def safe_to_datetime(date): - try: - return pd.to_datetime(date, errors='coerce', cache=False) - except (ValueError, TypeError): - return pd.NaT - df['datetime_column'] = df['datetime_column'].apply(safe_to_datetime) - - Replace 'datetime_column' with the actual column names containing date-like strings - Important Notes: - - Do NOT create a correlation matrix — correlation analysis is outside the scope of preprocessing - - Do NOT generate any plots or visualizations - Output Instructions: - 1. Include the full preprocessing Python code - 2. Provide a brief bullet-point summary of the steps performed. Example: - • Identified 5 numeric and 4 categorical columns - • Filled missing numeric values with column means - • Filled missing categorical values with column modes - • Converted 1 date column to datetime format - Respond in the user's language for all summary and reasoning but keep the code in english - """ - dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df, column_names set df as copy of df") - goal = dspy.InputField(desc="The user defined goal could ") - code = dspy.OutputField(desc ="The code that does the data preprocessing and introductory analysis") - summary = dspy.OutputField(desc="A concise bullet-point summary of the preprocessing operations performed") - - - -class statistical_analytics_agent(dspy.Signature): - # Statistical Analysis Agent, builds statistical models using StatsModel Package - """ - You are a statistical analytics agent. Your task is to take a dataset and a user-defined goal and output Python code that performs the appropriate statistical analysis to achieve that goal. Follow these guidelines: - IMPORTANT: You may be provided with previous interaction history. The section marked "### Current Query:" contains the user's current request. Any text in "### Previous Interaction History:" is for context only and is NOT part of the current request. - Data Handling: - Always handle strings as categorical variables in a regression using statsmodels C(string_column). - Do not change the index of the DataFrame. - Convert X and y into float when fitting a model. - Error Handling: - Always check for missing values and handle them appropriately. - Ensure that categorical variables are correctly processed. - Provide clear error messages if the model fitting fails. - Regression: - For regression, use statsmodels and ensure that a constant term is added to the predictor using sm.add_constant(X). - Handle categorical variables using C(column_name) in the model formula. - Fit the model with model = sm.OLS(y.astype(float), X.astype(float)).fit(). - Seasonal Decomposition: - Ensure the period is set correctly when performing seasonal decomposition. - Verify the number of observations works for the decomposition. - Output: - Ensure the code is executable and as intended. - Also choose the correct type of model for the problem - Avoid adding data visualization code. - Use code like this to prevent failing: - import pandas as pd - import numpy as np - import statsmodels.api as sm - def statistical_model(X, y, goal, period=None): - try: - # Check for missing values and handle them - X = X.dropna() - y = y.loc[X.index].dropna() - # Ensure X and y are aligned - X = X.loc[y.index] - # Convert categorical variables - for col in X.select_dtypes(include=['object', 'category']).columns: - X[col] = X[col].astype('category') - # Add a constant term to the predictor - X = sm.add_constant(X) - # Fit the model - if goal == 'regression': - # Handle categorical variables in the model formula - formula = 'y ~ ' + ' + '.join([f'C({col})' if X[col].dtype.name == 'category' else col for col in X.columns]) - model = sm.OLS(y.astype(float), X.astype(float)).fit() - return model.summary() - elif goal == 'seasonal_decompose': - if period is None: - raise ValueError("Period must be specified for seasonal decomposition") - decomposition = sm.tsa.seasonal_decompose(y, period=period) - return decomposition - else: - raise ValueError("Unknown goal specified. Please provide a valid goal.") - except Exception as e: - return f"An error occurred: {e}" - # Example usage: - result = statistical_analysis(X, y, goal='regression') - print(result) - If visualizing use plotly - Provide a concise bullet-point summary of the statistical analysis performed. - - Example Summary: - • Applied linear regression with OLS to predict house prices based on 5 features - • Model achieved R-squared of 0.78 - • Significant predictors include square footage (p<0.001) and number of bathrooms (p<0.01) - • Detected strong seasonal pattern with 12-month periodicity - • Forecast shows 15% growth trend over next quarter - Respond in the user's language for all summary and reasoning but keep the code in english - """ - dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df,columns set df as copy of df") - goal = dspy.InputField(desc="The user defined goal for the analysis to be performed") - code = dspy.OutputField(desc ="The code that does the statistical analysis using statsmodel") - summary = dspy.OutputField(desc="A concise bullet-point summary of the statistical analysis performed and key findings") - -class sk_learn_agent(dspy.Signature): - # Machine Learning Agent, performs task using sci-kit learn - """You are a machine learning agent. - Your task is to take a dataset and a user-defined goal, and output Python code that performs the appropriate machine learning analysis to achieve that goal. - You should use the scikit-learn library. - IMPORTANT: You may be provided with previous interaction history. The section marked "### Current Query:" contains the user's current request. Any text in "### Previous Interaction History:" is for context only and is NOT part of the current request. - Make sure your output is as intended! - Provide a concise bullet-point summary of the machine learning operations performed. - - Example Summary: - • Trained a Random Forest classifier on customer churn data with 80/20 train-test split - • Model achieved 92% accuracy and 88% F1-score - • Feature importance analysis revealed that contract length and monthly charges are the strongest predictors of churn - • Implemented K-means clustering (k=4) on customer shopping behaviors - • Identified distinct segments: high-value frequent shoppers (22%), occasional big spenders (35%), budget-conscious regulars (28%), and rare visitors (15%) - Respond in the user's language for all summary and reasoning but keep the code in english - """ - dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df,columns. set df as copy of df") - goal = dspy.InputField(desc="The user defined goal ") - code = dspy.OutputField(desc ="The code that does the Exploratory data analysis") - summary = dspy.OutputField(desc="A concise bullet-point summary of the machine learning analysis performed and key results") @@ -1094,55 +1052,6 @@ class code_combiner_agent(dspy.Signature): -class data_viz_agent(dspy.Signature): - # Visualizes data using Plotly - """ - You are an AI agent responsible for generating interactive data visualizations using Plotly. - IMPORTANT Instructions: - - The section marked "### Current Query:" contains the user's request. Any text in "### Previous Interaction History:" is for context only and should NOT be treated as part of the current request. - - You must only use the tools provided to you. This agent handles visualization only. - - If len(df) > 50000, always sample the dataset before visualization using: - if len(df) > 50000: - df = df.sample(50000, random_state=1) - - Each visualization must be generated as a **separate figure** using go.Figure(). - Do NOT use subplots under any circumstances. - - Each figure must be returned individually using: - fig.to_html(full_html=False) - - Use update_layout with xaxis and yaxis **only once per figure**. - - Enhance readability and clarity by: - • Using low opacity (0.4-0.7) where appropriate - • Applying visually distinct colors for different elements or categories - - Make sure the visual **answers the user's specific goal**: - • Identify what insight or comparison the user is trying to achieve - • Choose the visualization type and features (e.g., color, size, grouping) to emphasize that goal - • For example, if the user asks for "trends in revenue," use a time series line chart; if they ask for "top-performing categories," use a bar chart sorted by value - • Prioritize highlighting patterns, outliers, or comparisons relevant to the question - - Never include the dataset or styling index in the output. - - If there are no relevant columns for the requested visualization, respond with: - "No relevant columns found to generate this visualization." - - Use only one number format consistently: either 'K', 'M', or comma-separated values like 1,000/1,000,000. Do not mix formats. - - Only include trendlines in scatter plots if the user explicitly asks for them. - - Output only the code and a concise bullet-point summary of what the visualization reveals. - - Always end each visualization with: - fig.to_html(full_html=False) - Respond in the user's language for all summary and reasoning but keep the code in english - Example Summary: - • Created an interactive scatter plot of sales vs. marketing spend with color-coded product categories - • Included a trend line showing positive correlation (r=0.72) - • Highlighted outliers where high marketing spend resulted in low sales - • Generated a time series chart of monthly revenue from 2020-2023 - • Added annotations for key business events - • Visualization reveals 35% YoY growth with seasonal peaks in Q4 - - """ - goal = dspy.InputField(desc="user defined goal which includes information about data and chart they want to plot") - dataset = dspy.InputField(desc=" Provides information about the data in the data frame. Only use column names and dataframe_name as in this context") - styling_index = dspy.InputField(desc='Provides instructions on how to style your Plotly plots') - code= dspy.OutputField(desc="Plotly code that visualizes what the user needs according to the query & dataframe_index & styling_context") - summary = dspy.OutputField(desc="A concise bullet-point summary of the visualization created and key insights revealed") - - - class code_fix(dspy.Signature): """ You are an expert AI developer and data analyst assistant, skilled at identifying and resolving issues in Python code related to data analytics. Another agent has attempted to generate Python code for a data analytics task but produced code that is broken or throws an error. @@ -1218,42 +1127,88 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): self.agent_inputs = {} self.agent_desc = [] - # Create modules from agent signatures - for i, a in enumerate(agents): - name = a.__pydantic_core_schema__['schema']['model_name'] - self.agents[name] = dspy.asyncify(dspy.ChainOfThoughtWithHint(a)) - self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} - self.agent_desc.append(get_agent_description(name)) - - # Load ALL available template agents for direct access (regardless of user preferences) - if db_session: + # If no agents provided, load core agents from database + if not agents: + # Load the 4 core agents from database + core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] + + for agent_name in core_agent_names: + # Get the agent signature class + if agent_name == 'preprocessing_agent': + agent_signature = preprocessing_agent + elif agent_name == 'statistical_analytics_agent': + agent_signature = statistical_analytics_agent + elif agent_name == 'sk_learn_agent': + agent_signature = sk_learn_agent + elif agent_name == 'data_viz_agent': + agent_signature = data_viz_agent + + # Add to agents dict + self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + + # Set input fields based on signature + if agent_name == 'data_viz_agent': + self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[agent_name] = {'dataset', 'goal', 'plan_instructions'} + + # Get description from database + self.agent_desc.append({agent_name: get_agent_description(agent_name)}) + else: + # Load standard agents from provided list (legacy support) + for i, a in enumerate(agents): + name = a.__pydantic_core_schema__['schema']['model_name'] + self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) + self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} + self.agent_desc.append({name: get_agent_description(name)}) + + # Load user-enabled template agents if user_id and db_session are provided + if user_id and db_session: try: - template_signatures = load_all_available_templates_from_db(db_session) + # For individual use, load all available templates (not just planner-enabled ones) + template_signatures = load_user_enabled_templates_from_db(user_id, db_session) for template_name, signature in template_signatures.items(): # Add template agent to agents dict - self.agents[template_name] = dspy.asyncify(dspy.ChainOfThoughtWithHint(signature)) + self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) - # Extract input fields from signature - templates use standard fields - self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index', 'hint'} + # Extract input fields from signature - templates use standard fields like data_viz_agent + self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index'} - # Add description - self.agent_desc.append(f"Template: {template_name}") + # Store template agent description + try: + from src.db.schemas.models import AgentTemplate + + # Find template record + template_record = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() + + if template_record: + description = f"Template: {template_record.description}" + self.agent_desc.append({template_name: description}) + else: + self.agent_desc.append({template_name: f"Template: {template_name}"}) + except Exception as desc_error: + logger.log_message(f"Error getting description for template {template_name}: {str(desc_error)}", level=logging.WARNING) + self.agent_desc.append({template_name: f"Template: {template_name}"}) - logger.log_message(f"Loaded {len(template_signatures)} templates for direct access", level=logging.DEBUG) + logger.log_message(f"Loaded {len(template_signatures)} templates for individual use", level=logging.DEBUG) except Exception as e: - logger.log_message(f"Error loading templates for direct access: {str(e)}", level=logging.ERROR) - - # Initialize components - # self.memory_summarize_agent = dspy.ChainOfThought(m.memory_summarize_agent) + logger.log_message(f"Error loading template agents for user {user_id}: {str(e)}", level=logging.ERROR) + + self.agents['basic_qa_agent'] = dspy.asyncify(dspy.Predict("goal->answer")) + self.agent_inputs['basic_qa_agent'] = {"goal"} + self.agent_desc.append({'basic_qa_agent':"Answers queries unrelated to data & also that include links, poison or attempts to attack the system"}) + + # Initialize retrievers (no planner needed for individual agent execution) self.dataset = retrievers['dataframe_index'].as_retriever(k=1) self.styling_index = retrievers['style_index'].as_retriever(similarity_top_k=1) - # self.code_combiner_agent = dspy.ChainOfThought(code_combiner_agent) # Store user_id for usage tracking self.user_id = user_id - + async def _track_agent_usage(self, agent_name): """Track usage for template agents""" try: @@ -1422,66 +1377,111 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): self.agent_inputs = {} self.agent_desc = [] - # Load standard agents - for i, a in enumerate(agents): - name = a.__pydantic_core_schema__['schema']['model_name'] - self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) - self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} - self.agent_desc.append({name: get_agent_description(name)}) - - # Load user-enabled template agents if user_id and db_session are provided - if user_id and db_session: - try: - template_signatures = load_user_enabled_templates_for_planner_from_db(user_id, db_session) - - for template_name, signature in template_signatures.items(): - # Add template agent to agents dict - self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) - - # Extract input fields from signature - templates use standard fields like data_viz_agent - self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index'} + logger.log_message(f"Initializing auto_analyst for user_id: {user_id}", level=logging.INFO) + + # If no hardcoded agents provided, load all agents from database using preference system + if not agents: + logger.log_message("No hardcoded agents provided, loading from database with preferences", level=logging.INFO) + + if user_id and db_session: + try: + # Load user-enabled agents (both default and template) for planner use + template_signatures = load_user_enabled_templates_for_planner_from_db(user_id, db_session) + logger.log_message(f"Loaded {len(template_signatures)} enabled agents from preferences", level=logging.INFO) - # Store template agent description - try: - from src.db.schemas.models import AgentTemplate + for template_name, signature in template_signatures.items(): + # Add agent to agents dict + self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) - # Find template record - template_record = db_session.query(AgentTemplate).filter( - AgentTemplate.template_name == template_name - ).first() + # Set input fields based on agent type + is_viz_agent = (template_name == 'data_viz_agent' or + 'viz' in template_name.lower() or + 'visual' in template_name.lower() or + 'plot' in template_name.lower() or + 'chart' in template_name.lower() or + 'matplotlib' in template_name.lower()) - if template_record: - description = f"Template: {template_record.description}" - self.agent_desc.append({template_name: description}) + if is_viz_agent: + self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} else: - self.agent_desc.append({template_name: f"Template: {template_name}"}) - except Exception as desc_error: - logger.log_message(f"Error getting description for template {template_name}: {str(desc_error)}", level=logging.WARNING) - self.agent_desc.append({template_name: f"Template: {template_name}"}) + self.agent_inputs[template_name] = {'dataset', 'goal', 'plan_instructions'} - logger.log_message(f"Loaded {len(template_signatures)} enabled templates for planner", level=logging.DEBUG) - - except Exception as e: - logger.log_message(f"Error loading template agents for user {user_id}: {str(e)}", level=logging.ERROR) - + # Get description from database + self.agent_desc.append({template_name: get_agent_description(template_name)}) + logger.log_message(f"Added agent: {template_name}", level=logging.DEBUG) + + logger.log_message(f"Successfully loaded {len(template_signatures)} agents for planner", level=logging.INFO) + + except Exception as e: + logger.log_message(f"Error loading agents from preferences: {str(e)}", level=logging.ERROR) + # Fallback: load default agents without preferences + self._load_default_agents_fallback() + else: + logger.log_message("No user_id or db_session provided, loading default agents", level=logging.WARNING) + # Fallback: load default agents without preferences + self._load_default_agents_fallback() + else: + # Load standard agents from provided list (legacy support) + logger.log_message(f"Loading {len(agents)} hardcoded agents", level=logging.INFO) + for i, a in enumerate(agents): + name = a.__pydantic_core_schema__['schema']['model_name'] + self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) + self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} + self.agent_desc.append({name: get_agent_description(name)}) + logger.log_message(f"Added hardcoded agent: {name}", level=logging.DEBUG) + + # Always add basic QA agent self.agents['basic_qa_agent'] = dspy.asyncify(dspy.Predict("goal->answer")) self.agent_inputs['basic_qa_agent'] = {"goal"} self.agent_desc.append({'basic_qa_agent':"Answers queries unrelated to data & also that include links, poison or attempts to attack the system"}) + logger.log_message("Added basic_qa_agent", level=logging.DEBUG) - # Initialize coordination agents self.planner = planner_module() - # self.refine_goal = dspy.ChainOfThought(goal_refiner_agent) - # self.code_combiner_agent = dspy.ChainOfThought(code_combiner_agent) - # self.story_teller = dspy.ChainOfThought(story_teller_agent) self.memory_summarize_agent = dspy.ChainOfThought(m.memory_summarize_agent) + logger.log_message("Initialized planner and memory summarize agent", level=logging.DEBUG) # Initialize retrievers self.dataset = retrievers['dataframe_index'].as_retriever(k=1) self.styling_index = retrievers['style_index'].as_retriever(similarity_top_k=1) + logger.log_message("Initialized retrievers", level=logging.DEBUG) # Store user_id for usage tracking self.user_id = user_id + + # Final logging + logger.log_message(f"Auto_analyst initialization complete. Total agents: {len(self.agents)}, Agent names: {list(self.agents.keys())}", level=logging.INFO) + + def _load_default_agents_fallback(self): + """Fallback method to load default agents when preference system fails""" + logger.log_message("Loading default agents as fallback", level=logging.WARNING) + + # Load the 4 core agents from database + core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] + + for agent_name in core_agent_names: + # Get the agent signature class + if agent_name == 'preprocessing_agent': + agent_signature = preprocessing_agent + elif agent_name == 'statistical_analytics_agent': + agent_signature = statistical_analytics_agent + elif agent_name == 'sk_learn_agent': + agent_signature = sk_learn_agent + elif agent_name == 'data_viz_agent': + agent_signature = data_viz_agent + + # Add to agents dict + self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + + # Set input fields based on signature + if agent_name == 'data_viz_agent': + self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[agent_name] = {'dataset', 'goal', 'plan_instructions'} + + # Get description from database + self.agent_desc.append({agent_name: get_agent_description(agent_name)}) + logger.log_message(f"Added fallback agent: {agent_name}", level=logging.DEBUG) async def _track_agent_usage(self, agent_name): """Track usage for template agents""" @@ -1553,36 +1553,55 @@ async def _track_agent_usage(self, agent_name): async def execute_agent(self, agent_name, inputs): """Execute a single agent with given inputs""" + logger.log_message(f"Executing single agent: {agent_name}", level=logging.DEBUG) + try: result = await self.agents[agent_name.strip()](**inputs) # Track usage for custom agents and templates await self._track_agent_usage(agent_name.strip()) + logger.log_message(f"Agent {agent_name} execution completed", level=logging.DEBUG) return agent_name.strip(), dict(result) except Exception as e: + logger.log_message(f"Error in execute_agent for {agent_name}: {str(e)}", level=logging.ERROR) return agent_name.strip(), {"error": str(e)} async def get_plan(self, query): """Get the analysis plan""" + logger.log_message(f"Getting plan for query: {query[:100]}...", level=logging.INFO) + dict_ = {} dict_['dataset'] = self.dataset.retrieve(query)[0].text dict_['styling_index'] = self.styling_index.retrieve(query)[0].text dict_['goal'] = query dict_['Agent_desc'] = str(self.agent_desc) - module_return = await self.planner(goal=dict_['goal'], dataset=dict_['dataset'], Agent_desc=dict_['Agent_desc']) - plan_dict = dict(module_return['plan']) - if 'complexity' in module_return: - complexity = module_return['complexity'] - else: - complexity = 'basic' - plan_dict['complexity'] = complexity + logger.log_message(f"Available agents for planning: {list(self.agents.keys())}", level=logging.INFO) + logger.log_message(f"Agent descriptions length: {len(self.agent_desc)}", level=logging.DEBUG) + + try: + module_return = await self.planner(goal=dict_['goal'], dataset=dict_['dataset'], Agent_desc=dict_['Agent_desc']) + logger.log_message(f"Planner returned: {module_return}", level=logging.DEBUG) + + plan_dict = dict(module_return['plan']) + if 'complexity' in module_return: + complexity = module_return['complexity'] + else: + complexity = 'basic' + plan_dict['complexity'] = complexity - return plan_dict + logger.log_message(f"Final plan: {plan_dict}", level=logging.INFO) + return plan_dict + + except Exception as e: + logger.log_message(f"Error in get_plan: {str(e)}", level=logging.ERROR) + raise async def execute_plan(self, query, plan): """Execute the plan and yield results as they complete""" + logger.log_message(f"Executing plan: {plan}", level=logging.INFO) + dict_ = {} dict_['dataset'] = self.dataset.retrieve(query)[0].text dict_['styling_index'] = self.styling_index.retrieve(query)[0].text @@ -1593,91 +1612,67 @@ async def execute_plan(self, query, plan): # Clean and split the plan string into agent names plan_text = plan.get("plan", "").replace("Plan", "").replace(":", "").strip() + logger.log_message(f"Plan text after cleaning: {plan_text}", level=logging.DEBUG) - if "basic_qa_agent" in plan_text: + logger.log_message("Executing basic_qa_agent", level=logging.INFO) inputs = dict(goal=query) - agent_name, response = await self.execute_agent('basic_qa_agent', inputs) #! SHOULDN'T THIS BE **inputs ? + agent_name, response = await self.execute_agent('basic_qa_agent', inputs) yield agent_name, inputs, response return plan_list = [agent.strip() for agent in plan_text.split("->") if agent.strip()] + logger.log_message(f"Plan list: {plan_list}", level=logging.INFO) # Parse the attached plan_instructions into a dict raw_instr = plan.get("plan_instructions", {}) if isinstance(raw_instr, str): try: plan_instructions = json.loads(raw_instr) - except Exception: + except Exception as e: + logger.log_message(f"Error parsing plan_instructions JSON: {str(e)}", level=logging.ERROR) plan_instructions = {} elif isinstance(raw_instr, dict): - plan_instructions = str(raw_instr) + plan_instructions = raw_instr else: plan_instructions = {} + + logger.log_message(f"Parsed plan instructions: {plan_instructions}", level=logging.DEBUG) - # If no plan was produced, short-circuit - if not plan_list: - yield "plan_not_found", dict(plan), {"error": "No plan found"} + # Check if we have no valid agents to execute + if not plan_list or all(agent not in self.agents for agent in plan_list): + logger.log_message(f"No valid agents found in plan. Available agents: {list(self.agents.keys())}, Plan agents: {plan_list}", level=logging.ERROR) + yield "plan_not_found", None, {"error": "No valid agents found in plan"} return - - # Create async tasks for each agent, similar to deep analysis approach - tasks = [] - task_info = [] - - for idx, agent_name in enumerate(plan_list): - key = agent_name.strip() - # gather input fields except plan_instructions - inputs = { - param: dict_[param] - for param in self.agent_inputs[key] - if param != "plan_instructions" - } + # Execute agents in sequence + for agent_name in plan_list: + if agent_name not in self.agents: + logger.log_message(f"Agent '{agent_name}' not found in available agents: {list(self.agents.keys())}", level=logging.ERROR) + yield agent_name, {}, {"error": f"Agent '{agent_name}' not available"} + continue + + logger.log_message(f"Executing agent: {agent_name}", level=logging.INFO) - # attach the specific instructions for this agent with prev/next format - if "plan_instructions" in self.agent_inputs[key]: - # Get current agent instructions - current_instructions = plan_instructions.get(key, {"create": [], "use": [], "instruction": ""}) + try: + # Prepare inputs for the agent + inputs = {x: dict_[x] for x in self.agent_inputs[agent_name] if x in dict_} - # Format instructions with your_task first - formatted_instructions = {"your_task": current_instructions} + # Add plan instructions if available for this agent + if agent_name in plan_instructions: + inputs['plan_instructions'] = json.dumps(plan_instructions[agent_name]) + else: + inputs['plan_instructions'] = "" - # Add previous agent instructions if available - if idx > 0: - prev_agent = plan_list[idx-1].strip() - prev_instructions = plan_instructions.get(prev_agent, {}).get("instruction", "") - formatted_instructions[f"Previous Agent {prev_agent}"] = prev_instructions + logger.log_message(f"Agent inputs for {agent_name}: {list(inputs.keys())}", level=logging.DEBUG) - # Add next agent instructions if available - if idx < len(plan_list) - 1: - next_agent = plan_list[idx+1].strip() - next_instructions = plan_instructions.get(next_agent, {}).get("instruction", "") - formatted_instructions[f"Next Agent {next_agent}"] = next_instructions + # Execute the agent + agent_result_name, response = await self.execute_agent(agent_name, inputs) + logger.log_message(f"Agent {agent_name} completed successfully", level=logging.INFO) - inputs["plan_instructions"] = str(formatted_instructions) - - - # Create async task directly from the asyncified agent - task = self.execute_agent(agent_name, inputs) - tasks.append(task) - task_info.append((agent_name, inputs)) - - # Execute all tasks concurrently and yield results as they complete - try: - # Execute all tasks concurrently - results = await asyncio.gather(*tasks, return_exceptions=True) - - # Yield results with their corresponding task info - for i, result in enumerate(results): - agent_name, inputs = task_info[i] + yield agent_result_name, inputs, response - if isinstance(result, Exception): - yield agent_name, inputs, {"error": str(result)} - else: - name, response = result - yield name, inputs, response - - except Exception as e: - logger.log_message(f"Error in task execution: {str(e)}", level=logging.ERROR) - yield "error", {}, {"error": str(e)} + except Exception as e: + logger.log_message(f"Error executing agent {agent_name}: {str(e)}", level=logging.ERROR) + yield agent_name, {}, {"error": f"Error executing {agent_name}: {str(e)}"} diff --git a/auto-analyst-backend/src/agents/deep_agents.py b/auto-analyst-backend/src/agents/deep_agents.py index 7c62e91e..34c42849 100644 --- a/auto-analyst-backend/src/agents/deep_agents.py +++ b/auto-analyst-backend/src/agents/deep_agents.py @@ -728,6 +728,8 @@ class deep_code_fix(dspy.Signature): class deep_analysis_module(dspy.Module): def __init__(self,agents, agents_desc): + logger.log_message(f"Initializing deep_analysis_module with {len(agents)} agents: {list(agents.keys())}", level=logging.INFO) + self.agents = agents # Make all dspy operations async using asyncify self.deep_questions = dspy.asyncify(dspy.Predict(deep_questions)) @@ -741,6 +743,8 @@ def __init__(self,agents, agents_desc): self.styling_instructions = chart_instructions self.agents_desc = agents_desc self.final_conclusion = dspy.asyncify(dspy.ChainOfThought(final_conclusion)) + + logger.log_message(f"Deep analysis module initialized successfully with agents: {list(self.agents.keys())}", level=logging.INFO) async def execute_deep_analysis_streaming(self, goal, dataset_info, session_df=None): """ @@ -828,13 +832,13 @@ async def execute_deep_analysis_streaming(self, goal, dataset_info, session_df=N dspy.Example( goal=questions.deep_questions, dataset=dataset_info, - **({"plan_instructions": str(plan_instructions[key])} if "planner" in key else {}), - **({"styling_index": "Sample styling guidelines"} if "data_viz" in key else {}) + plan_instructions=str(plan_instructions[key]), + **({"styling_index": "Sample styling guidelines"} if "data_viz" in key or "viz" in key.lower() or "visual" in key.lower() or "plot" in key.lower() or "chart" in key.lower() else {}) ).with_inputs( "goal", - "dataset", - *(["plan_instructions"] if "planner" in key else []), - *(["styling_index"] if "data_viz" in key else []) + "dataset", + "plan_instructions", + *(["styling_index"] if "data_viz" in key or "viz" in key.lower() or "visual" in key.lower() or "plot" in key.lower() or "chart" in key.lower() else []) ) for key in keys ] diff --git a/auto-analyst-backend/src/db/init_db.py b/auto-analyst-backend/src/db/init_db.py index 4a8bf715..33ec869c 100644 --- a/auto-analyst-backend/src/db/init_db.py +++ b/auto-analyst-backend/src/db/init_db.py @@ -15,15 +15,17 @@ # Determine database type and set appropriate engine configurations if DATABASE_URL.startswith('postgresql'): # PostgreSQL-specific configuration - engine = create_engine( - DATABASE_URL, - pool_size=10, - max_overflow=20, - pool_pre_ping=True, # Check connection validity before use - pool_recycle=300 # Recycle connections after 5 minutes - ) - is_postgresql = True - logger.log_message("Using PostgreSQL database engine", logging.INFO) + ask = input("Are you sure?") + if ask.lower() == "yes": + engine = create_engine( + DATABASE_URL, + pool_size=10, + max_overflow=20, + pool_pre_ping=True, # Check connection validity before use + pool_recycle=300 # Recycle connections after 5 minutes + ) + is_postgresql = True + logger.log_message("Using PostgreSQL database engine", logging.INFO) else: # SQLite configuration engine = create_engine(DATABASE_URL) diff --git a/auto-analyst-backend/src/db/init_default_agents.py b/auto-analyst-backend/src/db/init_default_agents.py new file mode 100644 index 00000000..f6528949 --- /dev/null +++ b/auto-analyst-backend/src/db/init_default_agents.py @@ -0,0 +1,281 @@ +""" +Initialize default agents in the database. +This module should be run during application startup to ensure +default agents are available in the database. +""" + +import logging +from datetime import datetime, UTC +from src.utils.logger import Logger + +# Initialize logger +logger = Logger("init_default_agents", see_time=True, console_log=False) + +def load_default_agents_to_db(db_session, force_update=False): + """ + Load the default agents into the AgentTemplate table. + + Args: + db_session: Database session + force_update: If True, update existing agents. If False, skip existing ones. + + Returns: + Tuple (success: bool, message: str) + """ + try: + from src.db.schemas.models import AgentTemplate + + # Define default agents with their signatures and metadata + default_agents = { + "preprocessing_agent": { + "display_name": "Data Preprocessing Agent", + "description": "Cleans and prepares a DataFrame using Pandas and NumPy—handles missing values, detects column types, and converts date strings to datetime.", + "prompt_template": """You are a AI data-preprocessing agent. Generate clean and efficient Python code using NumPy and Pandas to perform introductory data preprocessing on a pre-loaded DataFrame df, based on the user's analysis goals. +Preprocessing Requirements: +1. Identify Column Types +- Separate columns into numeric and categorical using: + categorical_columns = df.select_dtypes(include=[object, 'category']).columns.tolist() + numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() +2. Handle Missing Values +- Numeric columns: Impute missing values using the mean of each column +- Categorical columns: Impute missing values using the mode of each column +3. Convert Date Strings to Datetime +- For any column suspected to represent dates (in string format), convert it to datetime using: + def safe_to_datetime(date): + try: + return pd.to_datetime(date, errors='coerce', cache=False) + except (ValueError, TypeError): + return pd.NaT + df['datetime_column'] = df['datetime_column'].apply(safe_to_datetime) +- Replace 'datetime_column' with the actual column names containing date-like strings +Important Notes: +- Do NOT create a correlation matrix — correlation analysis is outside the scope of preprocessing +- Do NOT generate any plots or visualizations +Output Instructions: +1. Include the full preprocessing Python code +2. Provide a brief bullet-point summary of the steps performed. Example: +• Identified 5 numeric and 4 categorical columns +• Filled missing numeric values with column means +• Filled missing categorical values with column modes +• Converted 1 date column to datetime format + Respond in the user's language for all summary and reasoning but keep the code in english""", + "category": "Data Manipulation", + "icon_url": "https://cdn.jsdelivr.net/gh/devicons/devicon/icons/pandas/pandas-original.svg" + }, + "statistical_analytics_agent": { + "display_name": "Statistical Analytics Agent", + "description": "Performs statistical analysis (e.g., regression, seasonal decomposition) using statsmodels, with proper handling of categorical data and missing values.", + "prompt_template": """ +You are a statistical analytics agent. Your task is to take a dataset and a user-defined goal and output Python code that performs the appropriate statistical analysis to achieve that goal. Follow these guidelines: +IMPORTANT: You may be provided with previous interaction history. The section marked "### Current Query:" contains the user's current request. Any text in "### Previous Interaction History:" is for context only and is NOT part of the current request. +Data Handling: +Always handle strings as categorical variables in a regression using statsmodels C(string_column). +Do not change the index of the DataFrame. +Convert X and y into float when fitting a model. +Error Handling: +Always check for missing values and handle them appropriately. +Ensure that categorical variables are correctly processed. +Provide clear error messages if the model fitting fails. +Regression: +For regression, use statsmodels and ensure that a constant term is added to the predictor using sm.add_constant(X). +Handle categorical variables using C(column_name) in the model formula. +Fit the model with model = sm.OLS(y.astype(float), X.astype(float)).fit(). +Seasonal Decomposition: +Ensure the period is set correctly when performing seasonal decomposition. +Verify the number of observations works for the decomposition. +Output: +Ensure the code is executable and as intended. +Also choose the correct type of model for the problem +Avoid adding data visualization code. +Use code like this to prevent failing: +import pandas as pd +import numpy as np +import statsmodels.api as sm +def statistical_model(X, y, goal, period=None): + try: + # Check for missing values and handle them + X = X.dropna() + y = y.loc[X.index].dropna() + # Ensure X and y are aligned + X = X.loc[y.index] + # Convert categorical variables + for col in X.select_dtypes(include=['object', 'category']).columns: + X[col] = X[col].astype('category') + # Add a constant term to the predictor + X = sm.add_constant(X) + # Fit the model + if goal == 'regression': + # Handle categorical variables in the model formula + formula = 'y ~ ' + ' + '.join([f'C({col})' if X[col].dtype.name == 'category' else col for col in X.columns]) + model = sm.OLS(y.astype(float), X.astype(float)).fit() + return model.summary() + elif goal == 'seasonal_decompose': + if period is None: + raise ValueError("Period must be specified for seasonal decomposition") + decomposition = sm.tsa.seasonal_decompose(y, period=period) + return decomposition + else: + raise ValueError("Unknown goal specified. Please provide a valid goal.") + except Exception as e: + return f"An error occurred: {e}" +# Example usage: +result = statistical_analysis(X, y, goal='regression') +print(result) +If visualizing use plotly +Provide a concise bullet-point summary of the statistical analysis performed. + +Example Summary: +• Applied linear regression with OLS to predict house prices based on 5 features +• Model achieved R-squared of 0.78 +• Significant predictors include square footage (p<0.001) and number of bathrooms (p<0.01) +• Detected strong seasonal pattern with 12-month periodicity +• Forecast shows 15% growth trend over next quarter +Respond in the user's language for all summary and reasoning but keep the code in english""", + "category": "Statistical Analysis", + "icon_url": "https://cdn.jsdelivr.net/gh/devicons/devicon/icons/statsmodels/statsmodels-original.svg" + }, + "sk_learn_agent": { + "display_name": "Machine Learning Agent", + "description": "Trains and evaluates machine learning models using scikit-learn, including classification, regression, and clustering with feature importance insights.", + "prompt_template": """You are a machine learning agent. +Your task is to take a dataset and a user-defined goal, and output Python code that performs the appropriate machine learning analysis to achieve that goal. +You should use the scikit-learn library. +IMPORTANT: You may be provided with previous interaction history. The section marked "### Current Query:" contains the user's current request. Any text in "### Previous Interaction History:" is for context only and is NOT part of the current request. +Make sure your output is as intended! +Provide a concise bullet-point summary of the machine learning operations performed. + +Example Summary: +• Trained a Random Forest classifier on customer churn data with 80/20 train-test split +• Model achieved 92% accuracy and 88% F1-score +• Feature importance analysis revealed that contract length and monthly charges are the strongest predictors of churn +• Implemented K-means clustering (k=4) on customer shopping behaviors +• Identified distinct segments: high-value frequent shoppers (22%), occasional big spenders (35%), budget-conscious regulars (28%), and rare visitors (15%) +Respond in the user's language for all summary and reasoning but keep the code in english""", + "category": "Modelling", + "icon_url": "https://cdn.jsdelivr.net/gh/devicons/devicon/icons/scikit-learn/scikit-learn-original.svg" + }, + "data_viz_agent": { + "display_name": "Data Visualization Agent", + "description": "Generates interactive visualizations with Plotly, selecting the best chart type to reveal trends, comparisons, and insights based on the analysis goal.", + "prompt_template": """ +You are an AI agent responsible for generating interactive data visualizations using Plotly. +IMPORTANT Instructions: +- The section marked "### Current Query:" contains the user's request. Any text in "### Previous Interaction History:" is for context only and should NOT be treated as part of the current request. +- You must only use the tools provided to you. This agent handles visualization only. +- If len(df) > 50000, always sample the dataset before visualization using: +if len(df) > 50000: + df = df.sample(50000, random_state=1) +- Each visualization must be generated as a **separate figure** using go.Figure(). +Do NOT use subplots under any circumstances. +- Each figure must be returned individually using: +fig.to_html(full_html=False) +- Use update_layout with xaxis and yaxis **only once per figure**. +- Enhance readability and clarity by: +• Using low opacity (0.4-0.7) where appropriate +• Applying visually distinct colors for different elements or categories +- Make sure the visual **answers the user's specific goal**: +• Identify what insight or comparison the user is trying to achieve +• Choose the visualization type and features (e.g., color, size, grouping) to emphasize that goal +• For example, if the user asks for "trends in revenue," use a time series line chart; if they ask for "top-performing categories," use a bar chart sorted by value +• Prioritize highlighting patterns, outliers, or comparisons relevant to the question +- Never include the dataset or styling index in the output. +- If there are no relevant columns for the requested visualization, respond with: +"No relevant columns found to generate this visualization." +- Use only one number format consistently: either 'K', 'M', or comma-separated values like 1,000/1,000,000. Do not mix formats. +- Only include trendlines in scatter plots if the user explicitly asks for them. +- Output only the code and a concise bullet-point summary of what the visualization reveals. +- Always end each visualization with: +fig.to_html(full_html=False) +Respond in the user's language for all summary and reasoning but keep the code in english +Example Summary: +• Created an interactive scatter plot of sales vs. marketing spend with color-coded product categories +• Included a trend line showing positive correlation (r=0.72) +• Highlighted outliers where high marketing spend resulted in low sales +• Generated a time series chart of monthly revenue from 2020-2023 +• Added annotations for key business events +• Visualization reveals 35% YoY growth with seasonal peaks in Q4""", + "category": "Visualization", + "icon_url": "https://cdn.jsdelivr.net/gh/devicons/devicon/icons/plotly/plotly-original.svg" + } + } + + created_count = 0 + updated_count = 0 + + for template_name, agent_data in default_agents.items(): + # Check if agent already exists + existing_agent = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() + + if existing_agent: + if force_update: + # Update existing agent + existing_agent.display_name = agent_data["display_name"] + existing_agent.description = agent_data["description"] + existing_agent.prompt_template = agent_data["prompt_template"] + existing_agent.category = agent_data["category"] + existing_agent.icon_url = agent_data["icon_url"] + existing_agent.is_premium_only = False + existing_agent.is_active = True + existing_agent.updated_at = datetime.now(UTC) + updated_count += 1 + else: + logger.log_message(f"Agent '{template_name}' already exists, skipping", level=logging.INFO) + continue + else: + # Create new agent + new_agent = AgentTemplate( + template_name=template_name, + display_name=agent_data["display_name"], + description=agent_data["description"], + prompt_template=agent_data["prompt_template"], + category=agent_data["category"], + icon_url=agent_data["icon_url"], + is_premium_only=False, + is_active=True, + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC) + ) + db_session.add(new_agent) + created_count += 1 + + db_session.commit() + + message = f"Successfully loaded default agents. Created: {created_count}, Updated: {updated_count}" + logger.log_message(message, level=logging.INFO) + return True, message + + except Exception as e: + db_session.rollback() + error_msg = f"Error loading default agents: {str(e)}" + logger.log_message(error_msg, level=logging.ERROR) + return False, error_msg + +def initialize_default_agents(force_update=False): + """ + Initialize default agents during application startup. + + Args: + force_update: If True, update existing agents. If False, skip existing ones. + + Returns: + bool: True if successful, False otherwise + """ + try: + from src.db.init_db import session_factory + + session = session_factory() + try: + success, message = load_default_agents_to_db(session, force_update=force_update) + logger.log_message(f"Default agents initialization: {message}", level=logging.INFO) + return success + finally: + session.close() + + except Exception as e: + logger.log_message(f"Failed to initialize default agents: {str(e)}", level=logging.ERROR) + return False + +if __name__ == "__main__": + initialize_default_agents(force_update=True) \ No newline at end of file diff --git a/auto-analyst-backend/src/managers/session_manager.py b/auto-analyst-backend/src/managers/session_manager.py index 013801c5..9ccbd292 100644 --- a/auto-analyst-backend/src/managers/session_manager.py +++ b/auto-analyst-backend/src/managers/session_manager.py @@ -27,19 +27,21 @@ class SessionManager: def __init__(self, styling_instructions: List[str], available_agents: Dict): """ - Initialize session manager with styling instructions and agents + Initialize SessionManager with styling instructions and available agents Args: - styling_instructions: List of styling instructions - available_agents: Dictionary of available agents + styling_instructions: List of styling instructions for visualization + available_agents: Dictionary of available agents (deprecated - agents now loaded from DB) """ + self.styling_instructions = styling_instructions self._sessions = {} self._default_df = None self._default_retrievers = None self._default_ai_system = None - self._dataset_description = None self._make_data = None - self._default_name = "Housing Dataset" # Default dataset name + # Initialize chat manager + self._dataset_description = "Housing Dataset" + self._default_name = "Housing.csv" self._dataset_description = """This dataset contains residential property information with details about pricing, physical characteristics, and amenities. The data can be used for real estate market analysis, property valuation, and understanding the relationship between house features and prices. @@ -92,8 +94,8 @@ def initialize_default_dataset(self): self._default_df = pd.read_csv("Housing.csv") self._make_data = make_data(self._default_df, self._dataset_description) self._default_retrievers = self.initialize_retrievers(self.styling_instructions, [str(self._make_data)]) - self._default_ai_system = auto_analyst(agents=list(self.available_agents.values()), - retrievers=self._default_retrievers) + # Create default AI system - agents will be loaded from database + self._default_ai_system = auto_analyst(agents=[], retrievers=self._default_retrievers) except Exception as e: logger.log_message(f"Error initializing default dataset: {str(e)}", level=logging.ERROR) raise e @@ -311,7 +313,7 @@ def create_ai_system_for_user(self, retrievers, user_id=None): try: # Create AI system with user context to load custom agents ai_system = auto_analyst( - agents=list(self.available_agents.values()), + agents=[], retrievers=retrievers, user_id=user_id, db_session=db_session @@ -322,12 +324,12 @@ def create_ai_system_for_user(self, retrievers, user_id=None): db_session.close() else: # Create standard AI system without custom agents - return auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers) + return auto_analyst(agents=[], retrievers=retrievers) except Exception as e: logger.log_message(f"Error creating AI system for user {user_id}: {str(e)}", level=logging.ERROR) # Fallback to standard AI system - return auto_analyst(agents=list(self.available_agents.values()), retrievers=retrievers) + return auto_analyst(agents=[], retrievers=retrievers) def set_session_user(self, session_id: str, user_id: int, chat_id: int = None): """ diff --git a/auto-analyst-backend/src/managers/user_manager.py b/auto-analyst-backend/src/managers/user_manager.py index af836299..23f10f39 100644 --- a/auto-analyst-backend/src/managers/user_manager.py +++ b/auto-analyst-backend/src/managers/user_manager.py @@ -1,12 +1,13 @@ import logging import os from typing import Optional +from datetime import datetime, UTC from fastapi import Depends, HTTPException, Request, status from fastapi.security import APIKeyHeader from src.db.init_db import get_session -from src.db.schemas.models import User as DBUser +from src.db.schemas.models import User as DBUser, AgentTemplate, UserTemplatePreference from src.schemas.user_schemas import User from src.utils.logger import Logger @@ -100,6 +101,9 @@ def create_user(username: str, email: str) -> User: session.commit() session.refresh(new_user) + # Enable default agents for the new user + _enable_default_agents_for_user(new_user.user_id, session) + return User( user_id=new_user.user_id, username=new_user.username, @@ -131,3 +135,48 @@ def get_user_by_email(email: str) -> Optional[User]: return None finally: session.close() + +def _enable_default_agents_for_user(user_id: int, session): + """Enable default agents for a new user""" + try: + # Get all default agents (the 4 built-in agents) + default_agent_names = [ + "preprocessing_agent", + "statistical_analytics_agent", + "sk_learn_agent", + "data_viz_agent" + ] + + # Find these agents in the database + default_agents = session.query(AgentTemplate).filter( + AgentTemplate.template_name.in_(default_agent_names), + AgentTemplate.is_active == True + ).all() + + # Enable each default agent for the user + for agent in default_agents: + # Check if preference already exists + existing_pref = session.query(UserTemplatePreference).filter( + UserTemplatePreference.user_id == user_id, + UserTemplatePreference.template_id == agent.template_id + ).first() + + if not existing_pref: + # Create new preference with enabled=True + new_pref = UserTemplatePreference( + user_id=user_id, + template_id=agent.template_id, + is_enabled=True, # Enable by default + usage_count=0, + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC) + ) + session.add(new_pref) + + session.commit() + logger.log_message(f"Enabled {len(default_agents)} default agents for user {user_id}", level=logging.INFO) + + except Exception as e: + session.rollback() + logger.log_message(f"Error enabling default agents for user {user_id}: {str(e)}", level=logging.ERROR) + raise diff --git a/auto-analyst-backend/src/routes/templates_routes.py b/auto-analyst-backend/src/routes/templates_routes.py index e7c354cf..06394a17 100644 --- a/auto-analyst-backend/src/routes/templates_routes.py +++ b/auto-analyst-backend/src/routes/templates_routes.py @@ -41,12 +41,15 @@ class UserTemplatePreferenceResponse(BaseModel): template_category: Optional[str] icon_url: Optional[str] is_premium_only: bool + is_active: bool is_enabled: bool usage_count: int last_used_at: Optional[datetime] + created_at: Optional[datetime] + updated_at: Optional[datetime] -class ToggleTemplateRequest(BaseModel): - is_enabled: bool = Field(..., description="Whether to enable or disable the template") +class TogglePreferenceRequest(BaseModel): + is_enabled: bool def get_global_usage_counts(session, template_ids: List[int] = None) -> Dict[int, int]: """ @@ -140,6 +143,14 @@ async def get_user_template_preferences(user_id: int): AgentTemplate.is_active == True ).all() + # Get list of default agent names that should be enabled by default + default_agent_names = [ + "preprocessing_agent", + "statistical_analytics_agent", + "sk_learn_agent", + "data_viz_agent" + ] + result = [] for template in templates: # Get user preference for this template if it exists @@ -148,6 +159,10 @@ async def get_user_template_preferences(user_id: int): UserTemplatePreference.template_id == template.template_id ).first() + # Determine if template should be enabled by default + is_default_agent = template.template_name in default_agent_names + default_enabled = is_default_agent # Default agents enabled by default, others disabled + result.append(UserTemplatePreferenceResponse( template_id=template.template_id, template_name=template.template_name, @@ -156,9 +171,12 @@ async def get_user_template_preferences(user_id: int): template_category=template.category, icon_url=template.icon_url, is_premium_only=template.is_premium_only, - is_enabled=preference.is_enabled if preference else False, # Default to disabled + is_active=template.is_active, + is_enabled=preference.is_enabled if preference else default_enabled, # Default agents enabled by default usage_count=preference.usage_count if preference else 0, - last_used_at=preference.last_used_at if preference else None + last_used_at=preference.last_used_at if preference else None, + created_at=preference.created_at if preference else None, + updated_at=preference.updated_at if preference else None )) return result @@ -189,6 +207,14 @@ async def get_user_enabled_templates(user_id: int): AgentTemplate.is_active == True ).all() + # Get list of default agent names that should be enabled by default + default_agent_names = [ + "preprocessing_agent", + "statistical_analytics_agent", + "sk_learn_agent", + "data_viz_agent" + ] + result = [] for template in all_templates: # Check if user has a preference record for this template @@ -197,8 +223,12 @@ async def get_user_enabled_templates(user_id: int): UserTemplatePreference.template_id == template.template_id ).first() - # Template is disabled by default unless explicitly enabled - is_enabled = preference.is_enabled if preference else False + # Determine if template should be enabled by default + is_default_agent = template.template_name in default_agent_names + default_enabled = is_default_agent # Default agents enabled by default, others disabled + + # Template is enabled by default for default agents, disabled for others + is_enabled = preference.is_enabled if preference else default_enabled if is_enabled: result.append(UserTemplatePreferenceResponse( @@ -209,9 +239,12 @@ async def get_user_enabled_templates(user_id: int): template_category=template.category, icon_url=template.icon_url, is_premium_only=template.is_premium_only, + is_active=template.is_active, is_enabled=True, usage_count=preference.usage_count if preference else 0, - last_used_at=preference.last_used_at if preference else None + last_used_at=preference.last_used_at if preference else None, + created_at=preference.created_at if preference else None, + updated_at=preference.updated_at if preference else None )) return result @@ -237,36 +270,66 @@ async def get_user_enabled_templates_for_planner(user_id: int): if not user: raise HTTPException(status_code=404, detail="User not found") - # Get enabled templates ordered by usage (most used first) and limit to 10 - enabled_preferences = session.query(UserTemplatePreference).filter( - UserTemplatePreference.user_id == user_id, - UserTemplatePreference.is_enabled == True - ).order_by( - UserTemplatePreference.usage_count.desc(), - UserTemplatePreference.last_used_at.desc() - ).limit(10).all() + # Get list of default agent names that should be enabled by default + default_agent_names = [ + "preprocessing_agent", + "statistical_analytics_agent", + "sk_learn_agent", + "data_viz_agent" + ] - result = [] - for preference in enabled_preferences: - # Get template details - template = session.query(AgentTemplate).filter( - AgentTemplate.template_id == preference.template_id, - AgentTemplate.is_active == True + # Get all active templates + all_templates = session.query(AgentTemplate).filter( + AgentTemplate.is_active == True + ).all() + + enabled_templates = [] + for template in all_templates: + # Check if user has a preference record for this template + preference = session.query(UserTemplatePreference).filter( + UserTemplatePreference.user_id == user_id, + UserTemplatePreference.template_id == template.template_id ).first() - if template: - result.append(UserTemplatePreferenceResponse( - template_id=template.template_id, - template_name=template.template_name, - display_name=template.display_name, - description=template.description, - template_category=template.category, - icon_url=template.icon_url, - is_premium_only=template.is_premium_only, - is_enabled=True, - usage_count=preference.usage_count, - last_used_at=preference.last_used_at - )) + # Determine if template should be enabled by default + is_default_agent = template.template_name in default_agent_names + default_enabled = is_default_agent # Default agents enabled by default, others disabled + + # Template is enabled by default for default agents, disabled for others + is_enabled = preference.is_enabled if preference else default_enabled + + if is_enabled: + enabled_templates.append({ + 'template': template, + 'preference': preference, + 'usage_count': preference.usage_count if preference else 0, + 'last_used_at': preference.last_used_at if preference else None + }) + + # Sort by usage (most used first) and limit to 10 + enabled_templates.sort(key=lambda x: (x['usage_count'], x['last_used_at'] or datetime.min.replace(tzinfo=UTC)), reverse=True) + enabled_templates = enabled_templates[:10] + + result = [] + for item in enabled_templates: + template = item['template'] + preference = item['preference'] + + result.append(UserTemplatePreferenceResponse( + template_id=template.template_id, + template_name=template.template_name, + display_name=template.display_name, + description=template.description, + template_category=template.category, + icon_url=template.icon_url, + is_premium_only=template.is_premium_only, + is_active=template.is_active, + is_enabled=True, + usage_count=preference.usage_count if preference else 0, + last_used_at=preference.last_used_at if preference else None, + created_at=preference.created_at if preference else None, + updated_at=preference.updated_at if preference else None + )) logger.log_message(f"Retrieved {len(result)} enabled templates for planner for user {user_id}", level=logging.INFO) return result @@ -281,7 +344,7 @@ async def get_user_enabled_templates_for_planner(user_id: int): raise HTTPException(status_code=500, detail=f"Failed to retrieve planner templates: {str(e)}") @router.post("/user/{user_id}/template/{template_id}/toggle") -async def toggle_template_preference(user_id: int, template_id: int, request: ToggleTemplateRequest): +async def toggle_template_preference(user_id: int, template_id: int, request: TogglePreferenceRequest): """Toggle a user's template preference (enable/disable for planner use)""" try: session = session_factory() diff --git a/auto-analyst-frontend/app/account/page.tsx b/auto-analyst-frontend/app/account/page.tsx index d0128229..e91ce7f5 100644 --- a/auto-analyst-frontend/app/account/page.tsx +++ b/auto-analyst-frontend/app/account/page.tsx @@ -86,7 +86,6 @@ export default function AccountPage() { const fetchUserData = async () => { try { - // logger.log('Fetching user data from API') setIsRefreshing(true) // Add cache-busting parameter and force flag to ensure fresh data @@ -96,13 +95,11 @@ export default function AccountPage() { } const data: UserDataResponse = await response.json() - // logger.log('Received user data:', data) setProfile(data.profile) setSubscription(data.subscription) // Enhanced credits handling using centralized config - // logger.log('Credits data:', data.credits) if (data.credits) { // Use centralized config to get plan-specific defaults diff --git a/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx b/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx index 9770e0f6..86610d56 100644 --- a/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx +++ b/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx @@ -165,6 +165,27 @@ export default function TemplatesModal({ return preferences.find(p => p.template_id === templateId) } + // Helper function to determine if a template should be enabled by default + const isDefaultEnabledTemplate = (templateName: string) => { + const defaultAgentNames = [ + "preprocessing_agent", + "statistical_analytics_agent", + "sk_learn_agent", + "data_viz_agent" + ] + return defaultAgentNames.includes(templateName) + } + + // Helper function to get the effective enabled state for a template + const getTemplateEnabledState = (template: TemplateAgent) => { + const preference = getTemplatePreference(template.template_id) + const defaultEnabled = isDefaultEnabledTemplate(template.template_name) + + return changes[template.template_id] !== undefined + ? changes[template.template_id] + : preference?.is_enabled ?? defaultEnabled + } + // Filter templates based on search, category, and status const filteredTemplates = useMemo(() => { let filtered = templates @@ -183,11 +204,7 @@ export default function TemplatesModal({ if (statusFilter !== 'all') { filtered = filtered.filter(template => { - const preference = getTemplatePreference(template.template_id) - const isEnabled = changes[template.template_id] !== undefined - ? changes[template.template_id] - : preference?.is_enabled || false - + const isEnabled = getTemplateEnabledState(template) return statusFilter === 'enabled' ? isEnabled : !isEnabled }) } @@ -294,18 +311,13 @@ export default function TemplatesModal({ // Get template data for rendering const getTemplateData = (template: TemplateAgent) => { const preference = getTemplatePreference(template.template_id) - const isEnabled = changes[template.template_id] !== undefined - ? changes[template.template_id] - : preference?.is_enabled || false + const isEnabled = getTemplateEnabledState(template) return { preference, isEnabled } } const enabledCount = hasAccess - ? preferences.filter(p => { - const hasChanges = changes[p.template_id] !== undefined - return hasChanges ? changes[p.template_id] : p.is_enabled - }).length + ? templates.filter(template => getTemplateEnabledState(template)).length : 0 return ( diff --git a/test_default_agents.py b/test_default_agents.py new file mode 100644 index 00000000..0519ecba --- /dev/null +++ b/test_default_agents.py @@ -0,0 +1 @@ + \ No newline at end of file From 4d99e9c248e7db2482b79401d47543d3b4ee4bd9 Mon Sep 17 00:00:00 2001 From: Ashad Qureshi Date: Tue, 17 Jun 2025 15:18:03 +0500 Subject: [PATCH 2/7] Deep Agents Integrated with DB --- auto-analyst-backend/app.py | 54 +++++++-- auto-analyst-backend/chat_database.db | 4 +- auto-analyst-backend/src/agents/agents.py | 105 ++++++++---------- .../src/agents/deep_agents.py | 8 +- 4 files changed, 100 insertions(+), 71 deletions(-) diff --git a/auto-analyst-backend/app.py b/auto-analyst-backend/app.py index 9e8d1221..98bf8c4f 100644 --- a/auto-analyst-backend/app.py +++ b/auto-analyst-backend/app.py @@ -320,11 +320,13 @@ def get_deep_analyzer(self, session_id: str): current_analyzer = session_state.get('deep_analyzer') analyzer_user_id = session_state.get('deep_analyzer_user_id') + logger.log_message(f"Deep analyzer check - session: {session_id}, current_user: {user_id}, analyzer_user: {analyzer_user_id}, has_analyzer: {current_analyzer is not None}", level=logging.INFO) + if (not current_analyzer or analyzer_user_id != user_id or not hasattr(session_state, 'deep_analyzer')): - logger.log_message(f"Creating/recreating deep analyzer for session {session_id}, user_id: {user_id}", level=logging.INFO) + logger.log_message(f"Creating/recreating deep analyzer for session {session_id}, user_id: {user_id} (reason: analyzer_exists={current_analyzer is not None}, user_match={analyzer_user_id == user_id})", level=logging.INFO) # Load user-enabled agents from database using preference system from src.db.init_db import session_factory @@ -336,6 +338,17 @@ def get_deep_analyzer(self, session_id: str): if user_id: enabled_agents_dict = load_user_enabled_templates_for_planner_from_db(user_id, db_session) logger.log_message(f"Deep analyzer loaded {len(enabled_agents_dict)} enabled agents for user {user_id}: {list(enabled_agents_dict.keys())}", level=logging.INFO) + + if not enabled_agents_dict: + logger.log_message(f"WARNING: No enabled agents found for user {user_id}, falling back to defaults", level=logging.WARNING) + # Fallback to default agents if no enabled agents + from src.agents.agents import preprocessing_agent, statistical_analytics_agent, sk_learn_agent, data_viz_agent + enabled_agents_dict = { + "preprocessing_agent": preprocessing_agent, + "statistical_analytics_agent": statistical_analytics_agent, + "sk_learn_agent": sk_learn_agent, + "data_viz_agent": data_viz_agent + } else: # Fallback to default agents if no user_id logger.log_message("No user_id in session, loading default agents for deep analysis", level=logging.WARNING) @@ -356,7 +369,7 @@ def get_deep_analyzer(self, session_id: str): # Get agent description from database deep_agents_desc[agent_name] = get_agent_description(agent_name) - logger.log_message(f"Deep analyzer initialized with agents: {list(deep_agents.keys())}", level=logging.INFO) + logger.log_message(f"Deep analyzer initialized with {len(deep_agents)} agents: {list(deep_agents.keys())}", level=logging.INFO) except Exception as e: logger.log_message(f"Error loading agents for deep analysis: {str(e)}", level=logging.ERROR) @@ -369,11 +382,14 @@ def get_deep_analyzer(self, session_id: str): "data_viz_agent": dspy.asyncify(dspy.ChainOfThought(data_viz_agent)) } deep_agents_desc = {name: get_agent_description(name) for name in deep_agents.keys()} + logger.log_message(f"Using fallback agents: {list(deep_agents.keys())}", level=logging.WARNING) finally: db_session.close() session_state['deep_analyzer'] = deep_analysis_module(agents=deep_agents, agents_desc=deep_agents_desc) session_state['deep_analyzer_user_id'] = user_id # Track which user this analyzer was created for + else: + logger.log_message(f"Using existing deep analyzer for session {session_id}, user_id: {user_id}", level=logging.INFO) return session_state['deep_analyzer'] @@ -477,7 +493,6 @@ async def chat_with_agent( template_agents = [agent for agent in agent_list if _is_template_agent(agent)] custom_agents = [agent for agent in agent_list if not _is_standard_agent(agent) and not _is_template_agent(agent)] - if custom_agents: # If any custom agents, use session AI system for all ai_system = session_state["ai_system"] @@ -651,10 +666,10 @@ def _validate_agent_name(agent_name: str, session_state: dict = None): # Single agent if not _is_agent_available(agent_name, session_state): available_agents = _get_available_agents_list(session_state) - raise HTTPException( + raise HTTPException( status_code=400, detail=f"Agent '{agent_name}' not found. Available agents: {available_agents}" - ) + ) def _is_agent_available(agent_name: str, session_state: dict = None) -> bool: """Check if an agent is available (standard, template, or custom)""" @@ -1060,11 +1075,11 @@ async def list_agents(request: Request, session_id: str = Depends(get_session_id available_agents_list.append(template_agent) return { - "available_agents": available_agents_list, - "standard_agents": standard_agents, - "template_agents": template_agents, - "custom_agents": custom_agents - } + "available_agents": available_agents_list, + "standard_agents": standard_agents, + "template_agents": template_agents, + "custom_agents": custom_agents + } except Exception as e: logger.log_message(f"Error getting agents list: {str(e)}", level=logging.ERROR) raise HTTPException(status_code=500, detail=f"Error getting agents list: {str(e)}") @@ -1563,6 +1578,25 @@ async def debug_deep_analysis_agents(session_id: str = Depends(get_session_id_de "user_id": user_id } +@app.post("/debug/clear_deep_analyzer") +async def clear_deep_analyzer_cache(session_id: str = Depends(get_session_id_dependency)): + """Debug endpoint to clear the deep analyzer cache and force reload""" + session_state = app.state.get_session_state(session_id) + + # Clear the cached deep analyzer + if 'deep_analyzer' in session_state: + del session_state['deep_analyzer'] + if 'deep_analyzer_user_id' in session_state: + del session_state['deep_analyzer_user_id'] + + logger.log_message(f"Cleared deep analyzer cache for session {session_id}", level=logging.INFO) + + return { + "message": "Deep analyzer cache cleared", + "session_id": session_id, + "user_id": session_state.get("user_id") + } + # In the section where routers are included, add the session_router app.include_router(chat_router) app.include_router(analytics_router) diff --git a/auto-analyst-backend/chat_database.db b/auto-analyst-backend/chat_database.db index e4dbfeb5..dbd0b933 100644 --- a/auto-analyst-backend/chat_database.db +++ b/auto-analyst-backend/chat_database.db @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:653453e8e99f92f0dd007bde41a21169a2fa32e8230b4d7903467c8787d199c9 -size 1581056 +oid sha256:ca7cc8c0d6e10043baa992f6011ad1f93c1cae97c866a88c219871091ba31ebd +size 3137536 diff --git a/auto-analyst-backend/src/agents/agents.py b/auto-analyst-backend/src/agents/agents.py index fc2745d2..4aa28dc1 100644 --- a/auto-analyst-backend/src/agents/agents.py +++ b/auto-analyst-backend/src/agents/agents.py @@ -674,7 +674,7 @@ async def forward(self, goal, dataset, Agent_desc): "complexity": complexity.exact_word_complexity.strip(), "plan": dict(plan) } - + except Exception as e: logger.log_message(f"Error with {complexity.exact_word_complexity.strip()} planner, falling back to intermediate: {str(e)}", level=logging.WARNING) @@ -708,10 +708,6 @@ async def forward(self, goal, dataset, Agent_desc): logger.log_message(f"Final planner output: {output}", level=logging.INFO) return output - - - - class preprocessing_agent(dspy.Signature): """ You are a preprocessing agent that can work both individually and in multi-agent data analytics systems. @@ -787,48 +783,48 @@ class data_viz_agent(dspy.Signature): You are a data visualization agent that can work both individually and in multi-agent analytics pipelines. Your primary responsibility is to generate visualizations based on the user-defined goal. -You are provided with: -* **goal**: A user-defined goal outlining the type of visualization the user wants (e.g., "plot sales over time with trendline"). + You are provided with: + * **goal**: A user-defined goal outlining the type of visualization the user wants (e.g., "plot sales over time with trendline"). * **dataset**: The dataset (e.g., `df_cleaned`) which will be passed to you by other agents in the pipeline. Do not assume or create any variables — the data is already present and valid when you receive it. -* **styling_index**: Specific styling instructions (e.g., axis formatting, color schemes) for the visualization. + * **styling_index**: Specific styling instructions (e.g., axis formatting, color schemes) for the visualization. * **plan_instructions**: Optional dictionary containing: * **'create'**: List of visualization components you must generate (e.g., 'scatter_plot', 'bar_chart'). * **'use'**: List of variables you must use to generate the visualizations. * **'instructions'**: Additional instructions related to the creation of the visualizations. ### Responsibilities: -1. **Strict Use of Provided Variables**: + 1. **Strict Use of Provided Variables**: * You must never create fake data. Only use the variables and datasets that are explicitly provided. * If plan_instructions are provided and any variable listed in plan_instructions['use'] is missing, return an error. * If no plan_instructions are provided, work with the available dataset directly. -2. **Visualization Creation**: + 2. **Visualization Creation**: * Based on the goal and optional 'create' section of plan_instructions, generate the required visualization using Plotly. * Respect the user-defined goal in determining which type of visualization to create. -3. **Performance Optimization**: + 3. **Performance Optimization**: * If the dataset contains more than 50,000 rows, you must sample the data to 5,000 rows to improve performance: - ```python - if len(df) > 50000: - df = df.sample(5000, random_state=42) - ``` + ```python + if len(df) > 50000: + df = df.sample(5000, random_state=42) + ``` -4. **Layout and Styling**: + 4. **Layout and Styling**: * Apply formatting and layout adjustments as defined by the styling_index. * Ensure that all axes (x and y) have consistent formats (e.g., using `K`, `M`, or 1,000 format, but not mixing formats). -5. **Trendlines**: + 5. **Trendlines**: * Trendlines should only be included if explicitly requested in the goal or plan_instructions. -6. **Displaying the Visualization**: - * Use Plotly's `fig.show()` method to display the created chart. + 6. **Displaying the Visualization**: + * Use Plotly's `fig.show()` method to display the created chart. * Never output raw datasets or the goal itself. Only the visualization code and the chart should be returned. -7. **Error Handling**: + 7. **Error Handling**: * If required dataset or variables are missing, return an error message indicating which specific variable is missing. * If the goal or create instructions are ambiguous, return an error stating the issue. -8. **No Data Modification**: + 8. **No Data Modification**: * Never modify the provided dataset or generate new data. If the data needs preprocessing, assume it's already been done by other agents. ### Important Notes: @@ -844,7 +840,7 @@ class data_viz_agent(dspy.Signature): - Always end each visualization with: fig.to_html(full_html=False) Respond in the user's language for all summary and reasoning but keep the code in english - """ + """ goal = dspy.InputField(desc="User-defined chart goal (e.g. trendlines, scatter plots)") dataset = dspy.InputField(desc="Details of the dataframe (`df`) and its columns") styling_index = dspy.InputField(desc="Instructions for plot styling and layout formatting") @@ -938,33 +934,33 @@ def statistical_model(X, y, goal, period=None): code = dspy.OutputField(desc="Python code for statistical modeling using statsmodels") summary = dspy.OutputField(desc="A concise bullet-point summary of the statistical analysis performed and key findings") - + class sk_learn_agent(dspy.Signature): """ You are a machine learning agent that can work both individually and in multi-agent data analytics pipelines. -You are given: -* A dataset (often cleaned and feature-engineered). -* A user-defined goal (e.g., classification, regression, clustering). + You are given: + * A dataset (often cleaned and feature-engineered). + * A user-defined goal (e.g., classification, regression, clustering). * Optional plan instructions specifying: * Which variables you are expected to CREATE (e.g., `trained_model`, `predictions`). * Which variables you will USE (e.g., `df_cleaned`, `target_variable`, `feature_columns`). * A set of instructions outlining additional processing or handling for these variables. ### Your Responsibilities: -* Use the scikit-learn library to implement the appropriate ML pipeline. -* Always split data into training and testing sets where applicable. -* Use `print()` for all outputs. -* Ensure your code is: + * Use the scikit-learn library to implement the appropriate ML pipeline. + * Always split data into training and testing sets where applicable. + * Use `print()` for all outputs. + * Ensure your code is: * Reproducible: Set `random_state=42` wherever applicable. * Modular: Avoid deeply nested code. * Focused on model building, not visualization (leave plotting to the `data_viz_agent`). -* Your task may include: - * Preprocessing inputs (e.g., encoding). - * Model selection and training. - * Evaluation (e.g., accuracy, RMSE, classification report). + * Your task may include: + * Preprocessing inputs (e.g., encoding). + * Model selection and training. + * Evaluation (e.g., accuracy, RMSE, classification report). ### You must not: -* Visualize anything (that's another agent's job). + * Visualize anything (that's another agent's job). * Rely on hardcoded column names — use those passed via plan_instructions or infer from data. * Never create or modify any variables not explicitly mentioned in plan_instructions['CREATE'] (if provided). * Never create the `df` variable. You will only work with the variables passed via the plan_instructions. @@ -982,9 +978,9 @@ class sk_learn_agent(dspy.Signature): Given that the plan_instructions specifies variables to CREATE and USE, and includes instructions, your approach should look like this: 1. Use `df_cleaned` and `feature_columns` from the plan_instructions to extract your features (`X`). 2. Use `target_column` from plan_instructions to extract your target (`y`). -3. If instructions are provided (e.g., scale or encode), follow them. -4. Split data into training and testing sets using `train_test_split`. -5. Train the model based on the received goal (classification, regression, etc.). + 3. If instructions are provided (e.g., scale or encode), follow them. + 4. Split data into training and testing sets using `train_test_split`. + 5. Train the model based on the received goal (classification, regression, etc.). 6. Store the output variables as specified in plan_instructions['CREATE']. ### Summary: @@ -992,12 +988,12 @@ class sk_learn_agent(dspy.Signature): 2. Only CREATE the variables specified in plan_instructions['CREATE'] (if provided). 3. Follow any additional instructions in plan_instructions['INSTRUCTIONS'] (if provided). 4. Ensure reproducibility by setting random_state=42 wherever necessary. -5. Focus on model building, evaluation, and saving the required outputs—avoid any unnecessary variables. + 5. Focus on model building, evaluation, and saving the required outputs—avoid any unnecessary variables. ### Output: * The code implementing the ML task, including all required steps. * A summary of what the model does, how it is evaluated, and why it fits the goal. -* Respond in the user's language for all summary and reasoning but keep the code in english + * Respond in the user's language for all summary and reasoning but keep the code in english """ dataset = dspy.InputField(desc="Input dataset, often cleaned and feature-selected (e.g., df_cleaned)") goal = dspy.InputField(desc="The user's machine learning goal (e.g., classification or regression)") @@ -1423,19 +1419,18 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): else: # Load standard agents from provided list (legacy support) logger.log_message(f"Loading {len(agents)} hardcoded agents", level=logging.INFO) - for i, a in enumerate(agents): - name = a.__pydantic_core_schema__['schema']['model_name'] - self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) - self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} - self.agent_desc.append({name: get_agent_description(name)}) - logger.log_message(f"Added hardcoded agent: {name}", level=logging.DEBUG) + for i, a in enumerate(agents): + name = a.__pydantic_core_schema__['schema']['model_name'] + self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) + self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} + self.agent_desc.append({name: get_agent_description(name)}) # Always add basic QA agent self.agents['basic_qa_agent'] = dspy.asyncify(dspy.Predict("goal->answer")) self.agent_inputs['basic_qa_agent'] = {"goal"} self.agent_desc.append({'basic_qa_agent':"Answers queries unrelated to data & also that include links, poison or attempts to attack the system"}) logger.log_message("Added basic_qa_agent", level=logging.DEBUG) - + # Initialize coordination agents self.planner = planner_module() self.memory_summarize_agent = dspy.ChainOfThought(m.memory_summarize_agent) @@ -1582,8 +1577,7 @@ async def get_plan(self, query): try: module_return = await self.planner(goal=dict_['goal'], dataset=dict_['dataset'], Agent_desc=dict_['Agent_desc']) - logger.log_message(f"Planner returned: {module_return}", level=logging.DEBUG) - + plan_dict = dict(module_return['plan']) if 'complexity' in module_return: complexity = module_return['complexity'] @@ -1591,7 +1585,6 @@ async def get_plan(self, query): complexity = 'basic' plan_dict['complexity'] = complexity - logger.log_message(f"Final plan: {plan_dict}", level=logging.INFO) return plan_dict except Exception as e: @@ -1613,7 +1606,7 @@ async def execute_plan(self, query, plan): # Clean and split the plan string into agent names plan_text = plan.get("plan", "").replace("Plan", "").replace(":", "").strip() logger.log_message(f"Plan text after cleaning: {plan_text}", level=logging.DEBUG) - + if "basic_qa_agent" in plan_text: logger.log_message("Executing basic_qa_agent", level=logging.INFO) inputs = dict(goal=query) @@ -1636,7 +1629,7 @@ async def execute_plan(self, query, plan): plan_instructions = raw_instr else: plan_instructions = {} - + logger.log_message(f"Parsed plan instructions: {plan_instructions}", level=logging.DEBUG) # Check if we have no valid agents to execute @@ -1644,7 +1637,7 @@ async def execute_plan(self, query, plan): logger.log_message(f"No valid agents found in plan. Available agents: {list(self.agents.keys())}, Plan agents: {plan_list}", level=logging.ERROR) yield "plan_not_found", None, {"error": "No valid agents found in plan"} return - + # Execute agents in sequence for agent_name in plan_list: if agent_name not in self.agents: @@ -1671,8 +1664,8 @@ async def execute_plan(self, query, plan): logger.log_message(f"Agent {agent_name} completed successfully", level=logging.INFO) yield agent_result_name, inputs, response - + except Exception as e: - logger.log_message(f"Error executing agent {agent_name}: {str(e)}", level=logging.ERROR) - yield agent_name, {}, {"error": f"Error executing {agent_name}: {str(e)}"} + logger.log_message(f"Error executing agent {agent_name}: {str(e)}", level=logging.ERROR) + yield agent_name, {}, {"error": f"Error executing {agent_name}: {str(e)}"} diff --git a/auto-analyst-backend/src/agents/deep_agents.py b/auto-analyst-backend/src/agents/deep_agents.py index 34c42849..d1f752b9 100644 --- a/auto-analyst-backend/src/agents/deep_agents.py +++ b/auto-analyst-backend/src/agents/deep_agents.py @@ -799,7 +799,8 @@ async def execute_deep_analysis_streaming(self, goal, dataset_info, session_df=N if not all(key in self.agents for key in keys): raise ValueError(f"Invalid agent key(s) in plan instructions. Available agents: {list(self.agents.keys())}") - + logger.log_message(f"Plan instructions: {plan_instructions}", logging.INFO) + logger.log_message(f"Keys: {keys}", logging.INFO) except (ValueError, SyntaxError, json.JSONDecodeError) as e: try: deep_plan = await self.deep_plan_fixer(plan_instructions=deep_plan.plan_instructions) @@ -807,6 +808,8 @@ async def execute_deep_analysis_streaming(self, goal, dataset_info, session_df=N if not isinstance(plan_instructions, dict): plan_instructions = json.loads(deep_plan.fixed_plan) keys = [key for key in plan_instructions.keys()] + logger.log_message(f"Plan instructions fixed: {plan_instructions}", logging.INFO) + logger.log_message(f"Keys: {keys}", logging.INFO) except (ValueError, SyntaxError, json.JSONDecodeError) as e: logger.log_message(f"Error parsing plan instructions: {e}", logging.ERROR) raise e @@ -842,9 +845,8 @@ async def execute_deep_analysis_streaming(self, goal, dataset_info, session_df=N ) for key in keys ] - tasks = [self.agents[key](**q) for q, key in zip(queries, keys)] - + # Await all tasks to complete summaries = [] codes = [] From 79afe7830bf125a258d3ad7b2df8e379085e9b6f Mon Sep 17 00:00:00 2001 From: Ashad Qureshi Date: Tue, 17 Jun 2025 15:34:52 +0500 Subject: [PATCH 3/7] Syntax Errors Fixed --- auto-analyst-backend/app.py | 5 +++-- auto-analyst-backend/chat_database.db | 2 +- auto-analyst-backend/src/agents/agents.py | 21 +++++++++++++-------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/auto-analyst-backend/app.py b/auto-analyst-backend/app.py index 98bf8c4f..edfd90f5 100644 --- a/auto-analyst-backend/app.py +++ b/auto-analyst-backend/app.py @@ -493,6 +493,7 @@ async def chat_with_agent( template_agents = [agent for agent in agent_list if _is_template_agent(agent)] custom_agents = [agent for agent in agent_list if not _is_standard_agent(agent) and not _is_template_agent(agent)] + if custom_agents: # If any custom agents, use session AI system for all ai_system = session_state["ai_system"] @@ -666,10 +667,10 @@ def _validate_agent_name(agent_name: str, session_state: dict = None): # Single agent if not _is_agent_available(agent_name, session_state): available_agents = _get_available_agents_list(session_state) - raise HTTPException( + raise HTTPException( status_code=400, detail=f"Agent '{agent_name}' not found. Available agents: {available_agents}" - ) + ) def _is_agent_available(agent_name: str, session_state: dict = None) -> bool: """Check if an agent is available (standard, template, or custom)""" diff --git a/auto-analyst-backend/chat_database.db b/auto-analyst-backend/chat_database.db index dbd0b933..8bb448c6 100644 --- a/auto-analyst-backend/chat_database.db +++ b/auto-analyst-backend/chat_database.db @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca7cc8c0d6e10043baa992f6011ad1f93c1cae97c866a88c219871091ba31ebd +oid sha256:2ac13830dde4e11eb1ab5b528da9fd6e539205adc6979d53ce7dba040dbd38c5 size 3137536 diff --git a/auto-analyst-backend/src/agents/agents.py b/auto-analyst-backend/src/agents/agents.py index 4aa28dc1..86a5f565 100644 --- a/auto-analyst-backend/src/agents/agents.py +++ b/auto-analyst-backend/src/agents/agents.py @@ -708,6 +708,10 @@ async def forward(self, goal, dataset, Agent_desc): logger.log_message(f"Final planner output: {output}", level=logging.INFO) return output + + + + class preprocessing_agent(dspy.Signature): """ You are a preprocessing agent that can work both individually and in multi-agent data analytics systems. @@ -1156,6 +1160,7 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): name = a.__pydantic_core_schema__['schema']['model_name'] self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} + logger.log_message(f"Added agent: {name}, inputs: {self.agent_inputs[name]}", level=logging.DEBUG) self.agent_desc.append({name: get_agent_description(name)}) # Load user-enabled template agents if user_id and db_session are provided @@ -1577,15 +1582,15 @@ async def get_plan(self, query): try: module_return = await self.planner(goal=dict_['goal'], dataset=dict_['dataset'], Agent_desc=dict_['Agent_desc']) - - plan_dict = dict(module_return['plan']) - if 'complexity' in module_return: - complexity = module_return['complexity'] - else: - complexity = 'basic' - plan_dict['complexity'] = complexity + + plan_dict = dict(module_return['plan']) + if 'complexity' in module_return: + complexity = module_return['complexity'] + else: + complexity = 'basic' + plan_dict['complexity'] = complexity - return plan_dict + return plan_dict except Exception as e: logger.log_message(f"Error in get_plan: {str(e)}", level=logging.ERROR) From 9326adc420ced170e00d1345fbbf3e8e9c6a28a2 Mon Sep 17 00:00:00 2001 From: Ashad Qureshi Date: Tue, 17 Jun 2025 16:44:13 +0500 Subject: [PATCH 4/7] Backend Bug Fixed - Working version 1.0 --- auto-analyst-backend/app.py | 57 ++++- auto-analyst-backend/chat_database.db | 2 +- auto-analyst-backend/src/agents/agents.py | 269 +++++++++++++++++----- 3 files changed, 259 insertions(+), 69 deletions(-) diff --git a/auto-analyst-backend/app.py b/auto-analyst-backend/app.py index edfd90f5..bfe73b8b 100644 --- a/auto-analyst-backend/app.py +++ b/auto-analyst-backend/app.py @@ -466,25 +466,34 @@ async def chat_with_agent( session_id: str = Depends(get_session_id_dependency) ): session_state = app.state.get_session_state(session_id) + logger.log_message(f"[DEBUG] chat_with_agent called with agent: '{agent_name}', query: '{request.query[:100]}...'", level=logging.DEBUG) try: # Extract and validate query parameters + logger.log_message(f"[DEBUG] Updating session from query params", level=logging.DEBUG) _update_session_from_query_params(request_obj, session_state) + logger.log_message(f"[DEBUG] Session state after query params: user_id={session_state.get('user_id')}, chat_id={session_state.get('chat_id')}", level=logging.DEBUG) # Validate dataset and agent name if session_state["current_df"] is None: + logger.log_message(f"[DEBUG] No dataset loaded", level=logging.DEBUG) raise HTTPException(status_code=400, detail=RESPONSE_ERROR_NO_DATASET) + logger.log_message(f"[DEBUG] About to validate agent name: '{agent_name}'", level=logging.DEBUG) _validate_agent_name(agent_name, session_state) + logger.log_message(f"[DEBUG] Agent validation completed successfully", level=logging.DEBUG) # Record start time for timing start_time = time.time() # Get chat context and prepare query + logger.log_message(f"[DEBUG] Preparing query with context", level=logging.DEBUG) enhanced_query = _prepare_query_with_context(request.query, session_state) + logger.log_message(f"[DEBUG] Enhanced query length: {len(enhanced_query)}", level=logging.DEBUG) # Initialize agent - handle standard, template, and custom agents if "," in agent_name: + logger.log_message(f"[DEBUG] Processing multiple agents: {agent_name}", level=logging.DEBUG) # Multiple agents case agent_list = [agent.strip() for agent in agent_name.split(",")] @@ -493,67 +502,84 @@ async def chat_with_agent( template_agents = [agent for agent in agent_list if _is_template_agent(agent)] custom_agents = [agent for agent in agent_list if not _is_standard_agent(agent) and not _is_template_agent(agent)] + logger.log_message(f"[DEBUG] Agent categorization - standard: {standard_agents}, template: {template_agents}, custom: {custom_agents}", level=logging.DEBUG) if custom_agents: # If any custom agents, use session AI system for all ai_system = session_state["ai_system"] session_lm = get_session_lm(session_state) + logger.log_message(f"[DEBUG] Using custom agent execution path", level=logging.DEBUG) with dspy.context(lm=session_lm): response = await asyncio.wait_for( _execute_custom_agents(ai_system, agent_list, enhanced_query), timeout=REQUEST_TIMEOUT_SECONDS ) + logger.log_message(f"[DEBUG] Custom agents response type: {type(response)}, keys: {list(response.keys()) if isinstance(response, dict) else 'not a dict'}", level=logging.DEBUG) else: # All standard/template agents - use auto_analyst_ind which loads from DB user_id = session_state.get("user_id") + logger.log_message(f"[DEBUG] Using auto_analyst_ind for multiple standard/template agents with user_id: {user_id}", level=logging.DEBUG) # Create database session for agent loading from src.db.init_db import session_factory db_session = session_factory() try: # auto_analyst_ind will load all agents from database + logger.log_message(f"[DEBUG] Creating auto_analyst_ind instance", level=logging.DEBUG) agent = auto_analyst_ind(agents=[], retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session) session_lm = get_session_lm(session_state) + logger.log_message(f"[DEBUG] About to call agent.forward with query and agent list", level=logging.DEBUG) with dspy.context(lm=session_lm): response = await asyncio.wait_for( agent.forward(enhanced_query, ",".join(agent_list)), timeout=REQUEST_TIMEOUT_SECONDS ) + logger.log_message(f"[DEBUG] auto_analyst_ind response type: {type(response)}, content: {str(response)[:200]}...", level=logging.DEBUG) finally: db_session.close() else: + logger.log_message(f"[DEBUG] Processing single agent: {agent_name}", level=logging.DEBUG) # Single agent case if _is_standard_agent(agent_name) or _is_template_agent(agent_name): # Standard or template agent - use auto_analyst_ind which loads from DB user_id = session_state.get("user_id") + logger.log_message(f"[DEBUG] Using auto_analyst_ind for single standard/template agent '{agent_name}' with user_id: {user_id}", level=logging.DEBUG) # Create database session for agent loading from src.db.init_db import session_factory db_session = session_factory() try: # auto_analyst_ind will load all agents from database + logger.log_message(f"[DEBUG] Creating auto_analyst_ind instance for single agent", level=logging.DEBUG) agent = auto_analyst_ind(agents=[], retrievers=session_state["retrievers"], user_id=user_id, db_session=db_session) session_lm = get_session_lm(session_state) + logger.log_message(f"[DEBUG] About to call agent.forward for single agent '{agent_name}'", level=logging.DEBUG) with dspy.context(lm=session_lm): response = await asyncio.wait_for( agent.forward(enhanced_query, agent_name), timeout=REQUEST_TIMEOUT_SECONDS ) + logger.log_message(f"[DEBUG] Single agent response type: {type(response)}, content: {str(response)[:200]}...", level=logging.DEBUG) finally: db_session.close() else: # Custom agent - use session AI system ai_system = session_state["ai_system"] session_lm = get_session_lm(session_state) + logger.log_message(f"[DEBUG] Using custom agent execution for '{agent_name}'", level=logging.DEBUG) with dspy.context(lm=session_lm): response = await asyncio.wait_for( _execute_custom_agents(ai_system, [agent_name], enhanced_query), timeout=REQUEST_TIMEOUT_SECONDS ) + logger.log_message(f"[DEBUG] Custom single agent response type: {type(response)}, content: {str(response)[:200]}...", level=logging.DEBUG) + logger.log_message(f"[DEBUG] About to format response to markdown. Response type: {type(response)}", level=logging.DEBUG) formatted_response = format_response_to_markdown(response, agent_name, session_state["current_df"]) + logger.log_message(f"[DEBUG] Formatted response type: {type(formatted_response)}, length: {len(str(formatted_response))}", level=logging.DEBUG) if formatted_response == RESPONSE_ERROR_INVALID_QUERY: + logger.log_message(f"[DEBUG] Response was invalid query error", level=logging.DEBUG) return { "agent_name": agent_name, "query": request.query, @@ -563,6 +589,7 @@ async def chat_with_agent( # Track usage statistics if session_state.get("user_id"): + logger.log_message(f"[DEBUG] Tracking model usage", level=logging.DEBUG) _track_model_usage( session_state=session_state, enhanced_query=enhanced_query, @@ -570,6 +597,7 @@ async def chat_with_agent( processing_time_ms=int((time.time() - start_time) * 1000) ) + logger.log_message(f"[DEBUG] chat_with_agent completed successfully", level=logging.DEBUG) return { "agent_name": agent_name, "query": request.query, # Return original query without context @@ -578,13 +606,19 @@ async def chat_with_agent( } except HTTPException: # Re-raise HTTP exceptions to preserve status codes + logger.log_message(f"[DEBUG] HTTPException caught and re-raised", level=logging.DEBUG) raise except asyncio.TimeoutError: + logger.log_message(f"[ERROR] Timeout error in chat_with_agent", level=logging.ERROR) raise HTTPException(status_code=504, detail="Request timed out. Please try a simpler query.") except Exception as e: + logger.log_message(f"[ERROR] Unexpected error in chat_with_agent: {str(e)}", level=logging.ERROR) + logger.log_message(f"[ERROR] Exception type: {type(e)}, traceback: {str(e)}", level=logging.ERROR) + import traceback + logger.log_message(f"[ERROR] Full traceback: {traceback.format_exc()}", level=logging.ERROR) raise HTTPException(status_code=500, detail="An unexpected error occurred. Please try again later.") - - + + @app.post("/chat", response_model=dict) async def chat_with_all( request: QueryRequest, @@ -653,24 +687,35 @@ def _update_session_from_query_params(request_obj: Request, session_state: dict) def _validate_agent_name(agent_name: str, session_state: dict = None): """Validate that the agent name(s) are available""" + logger.log_message(f"[DEBUG] Validating agent name: '{agent_name}'", level=logging.DEBUG) + if "," in agent_name: # Multiple agents agent_list = [agent.strip() for agent in agent_name.split(",")] + logger.log_message(f"[DEBUG] Multiple agents detected: {agent_list}", level=logging.DEBUG) for agent in agent_list: - if not _is_agent_available(agent, session_state): + is_available = _is_agent_available(agent, session_state) + logger.log_message(f"[DEBUG] Agent '{agent}' availability: {is_available}", level=logging.DEBUG) + if not is_available: available_agents = _get_available_agents_list(session_state) + logger.log_message(f"[DEBUG] Agent '{agent}' not found. Available: {available_agents}", level=logging.DEBUG) raise HTTPException( status_code=400, detail=f"Agent '{agent}' not found. Available agents: {available_agents}" ) else: # Single agent - if not _is_agent_available(agent_name, session_state): + is_available = _is_agent_available(agent_name, session_state) + logger.log_message(f"[DEBUG] Single agent '{agent_name}' availability: {is_available}", level=logging.DEBUG) + if not is_available: available_agents = _get_available_agents_list(session_state) - raise HTTPException( + logger.log_message(f"[DEBUG] Agent '{agent_name}' not found. Available: {available_agents}", level=logging.DEBUG) + raise HTTPException( status_code=400, detail=f"Agent '{agent_name}' not found. Available agents: {available_agents}" - ) + ) + + logger.log_message(f"[DEBUG] Agent validation passed for: '{agent_name}'", level=logging.DEBUG) def _is_agent_available(agent_name: str, session_state: dict = None) -> bool: """Check if an agent is available (standard, template, or custom)""" diff --git a/auto-analyst-backend/chat_database.db b/auto-analyst-backend/chat_database.db index 8bb448c6..440ad16d 100644 --- a/auto-analyst-backend/chat_database.db +++ b/auto-analyst-backend/chat_database.db @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ac13830dde4e11eb1ab5b528da9fd6e539205adc6979d53ce7dba040dbd38c5 +oid sha256:79c9f2406593f491c31fa9e9a5ee10ec3667f3dc12350b02a40b78f66ac155ec size 3137536 diff --git a/auto-analyst-backend/src/agents/agents.py b/auto-analyst-backend/src/agents/agents.py index 86a5f565..bb614b43 100644 --- a/auto-analyst-backend/src/agents/agents.py +++ b/auto-analyst-backend/src/agents/agents.py @@ -1150,7 +1150,7 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): if agent_name == 'data_viz_agent': self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} else: - self.agent_inputs[agent_name] = {'dataset', 'goal', 'plan_instructions'} + self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} # Get description from database self.agent_desc.append({agent_name: get_agent_description(agent_name)}) @@ -1173,8 +1173,19 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): # Add template agent to agents dict self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) - # Extract input fields from signature - templates use standard fields like data_viz_agent - self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index'} + # Determine if this is a visualization agent based on name + is_viz_agent = (template_name == 'data_viz_agent' or + 'viz' in template_name.lower() or + 'visual' in template_name.lower() or + 'plot' in template_name.lower() or + 'chart' in template_name.lower() or + 'matplotlib' in template_name.lower()) + + # Set input fields based on agent type + if is_viz_agent: + self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[template_name] = {'goal', 'dataset', 'plan_instructions'} # Store template agent description try: @@ -1291,39 +1302,112 @@ async def execute_agent(self, specified_agent, inputs): return specified_agent.strip(), {"error": str(e)} async def forward(self, query, specified_agent): + logger.log_message(f"[DEBUG] auto_analyst_ind.forward called with query: '{query[:100]}...', agent: '{specified_agent}'", level=logging.DEBUG) + try: # If specified_agent contains multiple agents separated by commas # This is for handling multiple @agent mentions in one query if "," in specified_agent: + logger.log_message(f"[DEBUG] Multiple agents detected in auto_analyst_ind", level=logging.DEBUG) agent_list = [agent.strip() for agent in specified_agent.split(",")] return await self.execute_multiple_agents(query, agent_list) + logger.log_message(f"[DEBUG] Processing single agent: '{specified_agent}'", level=logging.DEBUG) + # Process query with specified agent (single agent case) dict_ = {} + logger.log_message(f"[DEBUG] Retrieving dataset info", level=logging.DEBUG) dict_['dataset'] = self.dataset.retrieve(query)[0].text + logger.log_message(f"[DEBUG] Dataset retrieved, length: {len(dict_['dataset'])}", level=logging.DEBUG) + + logger.log_message(f"[DEBUG] Retrieving styling index", level=logging.DEBUG) dict_['styling_index'] = self.styling_index.retrieve(query)[0].text + logger.log_message(f"[DEBUG] Styling index retrieved, length: {len(dict_['styling_index'])}", level=logging.DEBUG) + dict_['hint'] = [] dict_['goal'] = query dict_['Agent_desc'] = str(self.agent_desc) + + logger.log_message(f"[DEBUG] Checking if agent '{specified_agent.strip()}' exists in agent_inputs", level=logging.DEBUG) + logger.log_message(f"[DEBUG] Available agent_inputs keys: {list(self.agent_inputs.keys())}", level=logging.DEBUG) + + if specified_agent.strip() not in self.agent_inputs: + logger.log_message(f"[ERROR] Agent '{specified_agent.strip()}' not found in agent_inputs", level=logging.ERROR) + return {"response": f"Agent '{specified_agent.strip()}' not found in agent inputs"} # Prepare inputs - inputs = {x:dict_[x] for x in self.agent_inputs[specified_agent.strip()]} - inputs['hint'] = str(dict_['hint']).replace('[','').replace(']','') + logger.log_message(f"[DEBUG] Preparing inputs for agent '{specified_agent.strip()}'", level=logging.DEBUG) + logger.log_message(f"[DEBUG] Required inputs for agent: {self.agent_inputs[specified_agent.strip()]}", level=logging.DEBUG) + + # Create inputs that match exactly what the agent expects + inputs = {} + required_fields = self.agent_inputs[specified_agent.strip()] + + for field in required_fields: + if field == 'goal': + inputs['goal'] = query + elif field == 'dataset': + inputs['dataset'] = dict_['dataset'] + elif field == 'styling_index': + inputs['styling_index'] = dict_['styling_index'] + elif field == 'plan_instructions': + inputs['plan_instructions'] = "" # Empty for individual agent use + elif field == 'hint': + inputs['hint'] = "" # Empty string for hint + else: + # For any other fields, try to get from dict_ if available + if field in dict_: + inputs[field] = dict_[field] + else: + logger.log_message(f"[WARNING] Field '{field}' required by agent but not available in dict_", level=logging.WARNING) + inputs[field] = "" # Provide empty string as fallback + + logger.log_message(f"[DEBUG] Final inputs prepared: {list(inputs.keys())}", level=logging.DEBUG) + logger.log_message(f"[DEBUG] Inputs match required fields: {set(inputs.keys()) == required_fields}", level=logging.DEBUG) + + logger.log_message(f"[DEBUG] Checking if agent '{specified_agent.strip()}' exists in agents dict", level=logging.DEBUG) + logger.log_message(f"[DEBUG] Available agents: {list(self.agents.keys())}", level=logging.DEBUG) + + if specified_agent.strip() not in self.agents: + logger.log_message(f"[ERROR] Agent '{specified_agent.strip()}' not found in agents dict", level=logging.ERROR) + return {"response": f"Agent '{specified_agent.strip()}' not found in agents"} # Execute agent + logger.log_message(f"[DEBUG] About to execute agent '{specified_agent.strip()}'", level=logging.DEBUG) result = await self.agents[specified_agent.strip()](**inputs) + logger.log_message(f"[DEBUG] Agent execution completed. Result type: {type(result)}", level=logging.DEBUG) + logger.log_message(f"[DEBUG] Agent result content: {str(result)[:200]}...", level=logging.DEBUG) # Track usage for template agents + logger.log_message(f"[DEBUG] Tracking usage for agent", level=logging.DEBUG) await self._track_agent_usage(specified_agent.strip()) - output_dict = {specified_agent.strip(): dict(result)} + logger.log_message(f"[DEBUG] Converting result to dict", level=logging.DEBUG) + try: + result_dict = dict(result) + logger.log_message(f"[DEBUG] Result converted to dict successfully. Keys: {list(result_dict.keys())}", level=logging.DEBUG) + except Exception as dict_error: + logger.log_message(f"[ERROR] Failed to convert result to dict: {str(dict_error)}", level=logging.ERROR) + logger.log_message(f"[ERROR] Result type that failed conversion: {type(result)}", level=logging.ERROR) + return {"response": f"Error converting agent result to dict: {str(dict_error)}"} + + logger.log_message(f"[DEBUG] Creating output dict", level=logging.DEBUG) + output_dict = {specified_agent.strip(): result_dict} + logger.log_message(f"[DEBUG] Output dict created successfully", level=logging.DEBUG) - if "error" in output_dict: - return {"response": f"Error executing agent: {output_dict['error']}"} + # Check for errors in the agent's response (not in the outer dict) + logger.log_message(f"[DEBUG] Checking for errors in agent response", level=logging.DEBUG) + if "error" in result_dict: + logger.log_message(f"[DEBUG] Error found in agent response: {result_dict['error']}", level=logging.DEBUG) + return {"response": f"Error executing agent: {result_dict['error']}"} + logger.log_message(f"[DEBUG] auto_analyst_ind.forward completed successfully", level=logging.DEBUG) return output_dict except Exception as e: + logger.log_message(f"[ERROR] Exception in auto_analyst_ind.forward: {str(e)}", level=logging.ERROR) + import traceback + logger.log_message(f"[ERROR] Full traceback: {traceback.format_exc()}", level=logging.ERROR) return {"response": f"This is the error from the system: {str(e)}"} async def execute_multiple_agents(self, query, agent_list): @@ -1347,8 +1431,33 @@ async def execute_multiple_agents(self, query, agent_list): continue # Prepare inputs for this agent - inputs = {x:dict_[x] for x in self.agent_inputs[agent_name] if x in dict_} - inputs['hint'] = str(dict_['hint']).replace('[','').replace(']','') + logger.log_message(f"[DEBUG] Preparing inputs for agent '{agent_name}'", level=logging.DEBUG) + logger.log_message(f"[DEBUG] Required inputs for agent: {self.agent_inputs[agent_name]}", level=logging.DEBUG) + + # Create inputs that match exactly what the agent expects + inputs = {} + required_fields = self.agent_inputs[agent_name] + + for field in required_fields: + if field == 'goal': + inputs['goal'] = query + elif field == 'dataset': + inputs['dataset'] = dict_['dataset'] + elif field == 'styling_index': + inputs['styling_index'] = dict_['styling_index'] + elif field == 'plan_instructions': + inputs['plan_instructions'] = "" # Empty for individual agent use + elif field == 'hint': + inputs['hint'] = "" # Empty string for hint + else: + # For any other fields, try to get from dict_ if available + if field in dict_: + inputs[field] = dict_[field] + else: + logger.log_message(f"[WARNING] Field '{field}' required by agent but not available in dict_", level=logging.WARNING) + + logger.log_message(f"[DEBUG] Final inputs prepared for '{agent_name}': {list(inputs.keys())}", level=logging.DEBUG) + logger.log_message(f"[DEBUG] Inputs match required fields: {set(inputs.keys()) == required_fields}", level=logging.DEBUG) # Execute agent agent_result = await self.agents[agent_name](**inputs) @@ -1380,57 +1489,93 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): logger.log_message(f"Initializing auto_analyst for user_id: {user_id}", level=logging.INFO) - # If no hardcoded agents provided, load all agents from database using preference system - if not agents: - logger.log_message("No hardcoded agents provided, loading from database with preferences", level=logging.INFO) - - if user_id and db_session: - try: - # Load user-enabled agents (both default and template) for planner use - template_signatures = load_user_enabled_templates_for_planner_from_db(user_id, db_session) - logger.log_message(f"Loaded {len(template_signatures)} enabled agents from preferences", level=logging.INFO) + # Load user-enabled template agents if user_id and db_session are provided + if user_id and db_session: + try: + # For individual use, load all available templates (not just planner-enabled ones) + template_signatures = load_user_enabled_templates_from_db(user_id, db_session) + + for template_name, signature in template_signatures.items(): + # Skip if this is a core agent - we'll load it separately + if template_name in ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent']: + continue + + # Add template agent to agents dict + self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) + + # Determine if this is a visualization agent based on name + is_viz_agent = ('viz' in template_name.lower() or + 'visual' in template_name.lower() or + 'plot' in template_name.lower() or + 'chart' in template_name.lower() or + 'matplotlib' in template_name.lower()) + + # Set input fields based on agent type + if is_viz_agent: + self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[template_name] = {'goal', 'dataset', 'plan_instructions'} - for template_name, signature in template_signatures.items(): - # Add agent to agents dict - self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) + # Store template agent description + try: + from src.db.schemas.models import AgentTemplate - # Set input fields based on agent type - is_viz_agent = (template_name == 'data_viz_agent' or - 'viz' in template_name.lower() or - 'visual' in template_name.lower() or - 'plot' in template_name.lower() or - 'chart' in template_name.lower() or - 'matplotlib' in template_name.lower()) + # Find template record + template_record = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() - if is_viz_agent: - self.agent_inputs[template_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + if template_record: + description = f"Template: {template_record.description}" + self.agent_desc.append({template_name: description}) else: - self.agent_inputs[template_name] = {'dataset', 'goal', 'plan_instructions'} - - # Get description from database - self.agent_desc.append({template_name: get_agent_description(template_name)}) - logger.log_message(f"Added agent: {template_name}", level=logging.DEBUG) + self.agent_desc.append({template_name: f"Template: {template_name}"}) + except Exception as desc_error: + logger.log_message(f"Error getting description for template {template_name}: {str(desc_error)}", level=logging.WARNING) + self.agent_desc.append({template_name: f"Template: {template_name}"}) - logger.log_message(f"Successfully loaded {len(template_signatures)} agents for planner", level=logging.INFO) - - except Exception as e: - logger.log_message(f"Error loading agents from preferences: {str(e)}", level=logging.ERROR) - # Fallback: load default agents without preferences - self._load_default_agents_fallback() - else: - logger.log_message("No user_id or db_session provided, loading default agents", level=logging.WARNING) - # Fallback: load default agents without preferences - self._load_default_agents_fallback() + logger.log_message(f"Loaded {len([t for t in template_signatures.keys() if t not in ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent']])} custom templates for individual use", level=logging.DEBUG) + + except Exception as e: + logger.log_message(f"Error loading template agents for user {user_id}: {str(e)}", level=logging.ERROR) + + # Load core agents (always load these, regardless of template preferences) + if not agents: + # Load the 4 core agents from database + core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] + + for agent_name in core_agent_names: + # Get the agent signature class + if agent_name == 'preprocessing_agent': + agent_signature = preprocessing_agent + elif agent_name == 'statistical_analytics_agent': + agent_signature = statistical_analytics_agent + elif agent_name == 'sk_learn_agent': + agent_signature = sk_learn_agent + elif agent_name == 'data_viz_agent': + agent_signature = data_viz_agent + + # Add to agents dict + self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + + # Set input fields based on signature + if agent_name == 'data_viz_agent': + self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} + + # Get description from database + self.agent_desc.append({agent_name: get_agent_description(agent_name)}) + logger.log_message(f"Loaded core agent: {agent_name} with inputs: {self.agent_inputs[agent_name]}", level=logging.DEBUG) else: # Load standard agents from provided list (legacy support) - logger.log_message(f"Loading {len(agents)} hardcoded agents", level=logging.INFO) - for i, a in enumerate(agents): - name = a.__pydantic_core_schema__['schema']['model_name'] - self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) - self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} - self.agent_desc.append({name: get_agent_description(name)}) - - # Always add basic QA agent + for i, a in enumerate(agents): + name = a.__pydantic_core_schema__['schema']['model_name'] + self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) + self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} + logger.log_message(f"Added agent: {name}, inputs: {self.agent_inputs[name]}", level=logging.DEBUG) + self.agent_desc.append({name: get_agent_description(name)}) + self.agents['basic_qa_agent'] = dspy.asyncify(dspy.Predict("goal->answer")) self.agent_inputs['basic_qa_agent'] = {"goal"} self.agent_desc.append({'basic_qa_agent':"Answers queries unrelated to data & also that include links, poison or attempts to attack the system"}) @@ -1477,7 +1622,7 @@ def _load_default_agents_fallback(self): if agent_name == 'data_viz_agent': self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} else: - self.agent_inputs[agent_name] = {'dataset', 'goal', 'plan_instructions'} + self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} # Get description from database self.agent_desc.append({agent_name: get_agent_description(agent_name)}) @@ -1583,14 +1728,14 @@ async def get_plan(self, query): try: module_return = await self.planner(goal=dict_['goal'], dataset=dict_['dataset'], Agent_desc=dict_['Agent_desc']) - plan_dict = dict(module_return['plan']) - if 'complexity' in module_return: - complexity = module_return['complexity'] - else: - complexity = 'basic' - plan_dict['complexity'] = complexity + plan_dict = dict(module_return['plan']) + if 'complexity' in module_return: + complexity = module_return['complexity'] + else: + complexity = 'basic' + plan_dict['complexity'] = complexity - return plan_dict + return plan_dict except Exception as e: logger.log_message(f"Error in get_plan: {str(e)}", level=logging.ERROR) From 81bcef0f6beb3aa4fc25afb911224105e5b0cd47 Mon Sep 17 00:00:00 2001 From: Ashad Qureshi Date: Tue, 17 Jun 2025 20:43:04 +0500 Subject: [PATCH 5/7] Backend Testing - working v2 --- auto-analyst-backend/chat_database.db | 4 +- auto-analyst-backend/src/agents/agents.py | 523 +++++++++++------- .../components/chat/AgentSuggestions.tsx | 79 +-- .../components/chat/ChatInput.tsx | 15 +- .../public/icons/templates/data_viz_agent.svg | 1 + .../icons/templates/matplotlib_agent.png | Bin 0 -> 15321 bytes .../public/icons/templates/polars_agent.svg | 83 +++ .../icons/templates/preprocessing_agent.svg | 1 + .../public/icons/templates/sk_learn_agent.svg | 111 ++++ 9 files changed, 576 insertions(+), 241 deletions(-) create mode 100644 auto-analyst-frontend/public/icons/templates/data_viz_agent.svg create mode 100644 auto-analyst-frontend/public/icons/templates/matplotlib_agent.png create mode 100644 auto-analyst-frontend/public/icons/templates/polars_agent.svg create mode 100644 auto-analyst-frontend/public/icons/templates/preprocessing_agent.svg create mode 100644 auto-analyst-frontend/public/icons/templates/sk_learn_agent.svg diff --git a/auto-analyst-backend/chat_database.db b/auto-analyst-backend/chat_database.db index 440ad16d..1197c58b 100644 --- a/auto-analyst-backend/chat_database.db +++ b/auto-analyst-backend/chat_database.db @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79c9f2406593f491c31fa9e9a5ee10ec3667f3dc12350b02a40b78f66ac155ec -size 3137536 +oid sha256:c2d3913c49d36bcbb74c9eb4a9277bb20be677bc89d47c2abf16e9a64e6282c2 +size 761856 diff --git a/auto-analyst-backend/src/agents/agents.py b/auto-analyst-backend/src/agents/agents.py index bb614b43..8de9d96f 100644 --- a/auto-analyst-backend/src/agents/agents.py +++ b/auto-analyst-backend/src/agents/agents.py @@ -9,7 +9,7 @@ logger = Logger("agents", see_time=True, console_log=False) # === CUSTOM AGENT FUNCTIONALITY === -def create_custom_agent_signature(agent_name, description, prompt_template): +def create_custom_agent_signature(agent_name, description, prompt_template, category=None): """ Dynamically creates a dspy.Signature class for custom agents. @@ -17,13 +17,18 @@ def create_custom_agent_signature(agent_name, description, prompt_template): agent_name: Name of the custom agent (e.g., 'pytorch_agent') description: Short description for agent selection prompt_template: Main prompt/instructions for agent behavior + category: Agent category from database (e.g., 'Visualization', 'Modelling', 'Data Manipulation') Returns: A dspy.Signature class with the custom prompt and standard input/output fields """ # Check if this is a visualization agent to determine input fields - is_viz_agent = 'viz' in agent_name.lower() or 'visual' in agent_name.lower() or 'plot' in agent_name.lower() or 'chart' in agent_name.lower() + # First check category, then fallback to name-based detection + if category and category.lower() == 'visualization': + is_viz_agent = True + else: + is_viz_agent = 'viz' in agent_name.lower() or 'visual' in agent_name.lower() or 'plot' in agent_name.lower() or 'chart' in agent_name.lower() # Standard input/output fields that match the unified agent signatures class_attributes = { @@ -95,7 +100,8 @@ def load_user_enabled_templates_from_db(user_id, db_session): signature = create_custom_agent_signature( template.template_name, template.description, - template.prompt_template + template.prompt_template, + template.category # Pass the category from database ) agent_signatures[template.template_name] = signature @@ -172,7 +178,8 @@ def load_user_enabled_templates_for_planner_from_db(user_id, db_session): signature = create_custom_agent_signature( template.template_name, template.description, - template.prompt_template + template.prompt_template, + template.category # Pass the category from database ) agent_signatures[template.template_name] = signature @@ -292,7 +299,8 @@ def load_all_available_templates_from_db(db_session): signature = create_custom_agent_signature( template.template_name, template.description, - template.prompt_template + template.prompt_template, + template.category # Pass the category from database ) agent_signatures[template.template_name] = signature @@ -630,9 +638,6 @@ def __init__(self): self.allocator = dspy.Predict("goal,planner_desc,dataset->exact_word_complexity,reasoning") async def forward(self, goal, dataset, Agent_desc): - logger.log_message(f"Planner forward called with goal: {goal[:100]}...", level=logging.INFO) - logger.log_message(f"Agent descriptions: {Agent_desc}", level=logging.DEBUG) - # Check if we have any agents available if not Agent_desc or Agent_desc == "[]" or len(str(Agent_desc).strip()) < 10: logger.log_message("No agents available for planning", level=logging.WARNING) @@ -644,11 +649,8 @@ async def forward(self, goal, dataset, Agent_desc): try: complexity = self.allocator(goal=goal, planner_desc=str(self.planner_desc), dataset=str(dataset)) - logger.log_message(f"Complexity determined: {complexity.exact_word_complexity.strip()}", level=logging.INFO) - # If complexity is unrelated, return basic_qa_agent if complexity.exact_word_complexity.strip() == "unrelated": - logger.log_message("Query classified as unrelated, using basic_qa_agent", level=logging.INFO) return { "complexity": complexity.exact_word_complexity.strip(), "plan": "basic_qa_agent", @@ -705,7 +707,6 @@ async def forward(self, goal, dataset, Agent_desc): "plan_instructions": {"error": f"Planning error: {str(e)}"} } - logger.log_message(f"Final planner output: {output}", level=logging.INFO) return output @@ -1127,59 +1128,115 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): self.agent_inputs = {} self.agent_desc = [] - # If no agents provided, load core agents from database - if not agents: - # Load the 4 core agents from database - core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] - - for agent_name in core_agent_names: - # Get the agent signature class - if agent_name == 'preprocessing_agent': - agent_signature = preprocessing_agent - elif agent_name == 'statistical_analytics_agent': - agent_signature = statistical_analytics_agent - elif agent_name == 'sk_learn_agent': - agent_signature = sk_learn_agent - elif agent_name == 'data_viz_agent': - agent_signature = data_viz_agent - - # Add to agents dict - self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + logger.log_message(f"[INIT] Initializing auto_analyst_ind with user_id={user_id}, agents={len(agents) if agents else 0}", level=logging.INFO) + + # Load core agents based on user preferences (not always loaded) + if not agents and user_id and db_session: + try: + # Get user preferences for core agents + from src.db.schemas.models import AgentTemplate, UserTemplatePreference - # Set input fields based on signature - if agent_name == 'data_viz_agent': - self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} - else: - self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} + core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] - # Get description from database - self.agent_desc.append({agent_name: get_agent_description(agent_name)}) + for agent_name in core_agent_names: + logger.log_message(f"[INIT] Processing core agent: {agent_name}", level=logging.DEBUG) + + # Check if user has enabled this core agent + template = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == agent_name, + AgentTemplate.is_active == True + ).first() + + if not template: + logger.log_message(f"[INIT] Core agent template '{agent_name}' not found in database", level=logging.WARNING) + continue + + # Get the agent signature class + if agent_name == 'preprocessing_agent': + agent_signature = preprocessing_agent + elif agent_name == 'statistical_analytics_agent': + agent_signature = statistical_analytics_agent + elif agent_name == 'sk_learn_agent': + agent_signature = sk_learn_agent + elif agent_name == 'data_viz_agent': + agent_signature = data_viz_agent + + # Add to agents dict + self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + + # Set input fields based on signature + if agent_name == 'data_viz_agent': + self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} + + # Get description from database + self.agent_desc.append({agent_name: get_agent_description(agent_name)}) + logger.log_message(f"[INIT] Successfully loaded core agent: {agent_name} with inputs: {self.agent_inputs[agent_name]}", level=logging.INFO) + + except Exception as e: + logger.log_message(f"[INIT] Error loading core agents based on preferences: {str(e)}", level=logging.ERROR) + # Fallback to loading all core agents if preference system fails + self._load_default_agents_fallback() + elif not agents: + # If no user_id/db_session provided, load all core agents as fallback + logger.log_message(f"[INIT] No agents provided and no user_id/db_session, loading fallback agents", level=logging.INFO) + self._load_default_agents_fallback() else: # Load standard agents from provided list (legacy support) + logger.log_message(f"[INIT] Loading agents from provided list (legacy support)", level=logging.INFO) for i, a in enumerate(agents): name = a.__pydantic_core_schema__['schema']['model_name'] self.agents[name] = dspy.asyncify(dspy.ChainOfThought(a)) self.agent_inputs[name] = {x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')} - logger.log_message(f"Added agent: {name}, inputs: {self.agent_inputs[name]}", level=logging.DEBUG) + logger.log_message(f"[INIT] Added legacy agent: {name}, inputs: {self.agent_inputs[name]}", level=logging.DEBUG) self.agent_desc.append({name: get_agent_description(name)}) - # Load user-enabled template agents if user_id and db_session are provided + # Load ALL available template agents if user_id and db_session are provided + # For individual agent execution (@agent_name), users should be able to access any available agent if user_id and db_session: try: - # For individual use, load all available templates (not just planner-enabled ones) - template_signatures = load_user_enabled_templates_from_db(user_id, db_session) + # For individual use, load ALL available templates regardless of user preferences + template_signatures = load_all_available_templates_from_db(db_session) + + logger.log_message(f"[INIT] Loaded {len(template_signatures)} template signatures from database", level=logging.INFO) for template_name, signature in template_signatures.items(): + # Skip if this is a core agent - we'll load it separately + if template_name in ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent']: + logger.log_message(f"[INIT] Skipping template {template_name} as it's a core agent", level=logging.DEBUG) + continue + # Add template agent to agents dict self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) - # Determine if this is a visualization agent based on name - is_viz_agent = (template_name == 'data_viz_agent' or - 'viz' in template_name.lower() or - 'visual' in template_name.lower() or - 'plot' in template_name.lower() or - 'chart' in template_name.lower() or - 'matplotlib' in template_name.lower()) + # Determine if this is a visualization agent based on database category + is_viz_agent = False + try: + from src.db.schemas.models import AgentTemplate + + # Find template record to check category + template_record = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() + + if template_record and template_record.category and template_record.category.lower() == 'visualization': + is_viz_agent = True + else: + # Fallback to name-based detection for legacy templates + is_viz_agent = ('viz' in template_name.lower() or + 'visual' in template_name.lower() or + 'plot' in template_name.lower() or + 'chart' in template_name.lower() or + 'matplotlib' in template_name.lower()) + except Exception as cat_error: + logger.log_message(f"[INIT] Error checking category for template {template_name}: {str(cat_error)}", level=logging.WARNING) + # Fallback to name-based detection + is_viz_agent = ('viz' in template_name.lower() or + 'visual' in template_name.lower() or + 'plot' in template_name.lower() or + 'chart' in template_name.lower() or + 'matplotlib' in template_name.lower()) # Set input fields based on agent type if is_viz_agent: @@ -1189,12 +1246,10 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): # Store template agent description try: - from src.db.schemas.models import AgentTemplate - - # Find template record - template_record = db_session.query(AgentTemplate).filter( - AgentTemplate.template_name == template_name - ).first() + if not template_record: + template_record = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() if template_record: description = f"Template: {template_record.description}" @@ -1202,13 +1257,13 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): else: self.agent_desc.append({template_name: f"Template: {template_name}"}) except Exception as desc_error: - logger.log_message(f"Error getting description for template {template_name}: {str(desc_error)}", level=logging.WARNING) + logger.log_message(f"[INIT] Error getting description for template {template_name}: {str(desc_error)}", level=logging.WARNING) self.agent_desc.append({template_name: f"Template: {template_name}"}) - logger.log_message(f"Loaded {len(template_signatures)} templates for individual use", level=logging.DEBUG) - + logger.log_message(f"[INIT] Successfully loaded template agent: {template_name} with inputs: {self.agent_inputs[template_name]}, is_viz_agent: {is_viz_agent}", level=logging.INFO) + except Exception as e: - logger.log_message(f"Error loading template agents for user {user_id}: {str(e)}", level=logging.ERROR) + logger.log_message(f"[INIT] Error loading template agents for user {user_id}: {str(e)}", level=logging.ERROR) self.agents['basic_qa_agent'] = dspy.asyncify(dspy.Predict("goal->answer")) self.agent_inputs['basic_qa_agent'] = {"goal"} @@ -1220,6 +1275,42 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): # Store user_id for usage tracking self.user_id = user_id + + # Log final summary + logger.log_message(f"[INIT] Initialization complete. Total agents loaded: {len(self.agents)}", level=logging.INFO) + logger.log_message(f"[INIT] Available agents: {list(self.agents.keys())}", level=logging.INFO) + logger.log_message(f"[INIT] Agent inputs mapping: {self.agent_inputs}", level=logging.DEBUG) + + def _load_default_agents_fallback(self): + """Fallback method to load default agents when preference system fails""" + logger.log_message("Loading default agents as fallback for auto_analyst_ind", level=logging.WARNING) + + # Load the 4 core agents from database + core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] + + for agent_name in core_agent_names: + # Get the agent signature class + if agent_name == 'preprocessing_agent': + agent_signature = preprocessing_agent + elif agent_name == 'statistical_analytics_agent': + agent_signature = statistical_analytics_agent + elif agent_name == 'sk_learn_agent': + agent_signature = sk_learn_agent + elif agent_name == 'data_viz_agent': + agent_signature = data_viz_agent + + # Add to agents dict + self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + + # Set input fields based on signature + if agent_name == 'data_viz_agent': + self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} + + # Get description from database + self.agent_desc.append({agent_name: get_agent_description(agent_name)}) + logger.log_message(f"Added fallback agent: {agent_name}", level=logging.DEBUG) async def _track_agent_usage(self, agent_name): """Track usage for template agents""" @@ -1290,59 +1381,58 @@ async def _track_agent_usage(self, agent_name): async def execute_agent(self, specified_agent, inputs): """Execute agent and generate memory summary in parallel""" try: + logger.log_message(f"[EXECUTE] Starting execution of agent: {specified_agent}", level=logging.INFO) + logger.log_message(f"[EXECUTE] Agent inputs: {inputs}", level=logging.DEBUG) + # Execute main agent agent_result = await self.agents[specified_agent.strip()](**inputs) # Track usage for custom agents and templates await self._track_agent_usage(specified_agent.strip()) + logger.log_message(f"[EXECUTE] Agent {specified_agent} execution completed successfully", level=logging.INFO) return specified_agent.strip(), dict(agent_result) except Exception as e: + logger.log_message(f"[EXECUTE] Error executing agent {specified_agent}: {str(e)}", level=logging.ERROR) + import traceback + logger.log_message(f"[EXECUTE] Full traceback: {traceback.format_exc()}", level=logging.ERROR) return specified_agent.strip(), {"error": str(e)} - async def forward(self, query, specified_agent): - logger.log_message(f"[DEBUG] auto_analyst_ind.forward called with query: '{query[:100]}...', agent: '{specified_agent}'", level=logging.DEBUG) - + async def forward(self, query, specified_agent): try: + logger.log_message(f"[FORWARD] Processing query with specified agent: {specified_agent}", level=logging.INFO) + logger.log_message(f"[FORWARD] Query: {query}", level=logging.DEBUG) + # If specified_agent contains multiple agents separated by commas # This is for handling multiple @agent mentions in one query if "," in specified_agent: - logger.log_message(f"[DEBUG] Multiple agents detected in auto_analyst_ind", level=logging.DEBUG) agent_list = [agent.strip() for agent in specified_agent.split(",")] + logger.log_message(f"[FORWARD] Multiple agents detected: {agent_list}", level=logging.INFO) return await self.execute_multiple_agents(query, agent_list) - logger.log_message(f"[DEBUG] Processing single agent: '{specified_agent}'", level=logging.DEBUG) - # Process query with specified agent (single agent case) dict_ = {} - logger.log_message(f"[DEBUG] Retrieving dataset info", level=logging.DEBUG) dict_['dataset'] = self.dataset.retrieve(query)[0].text - logger.log_message(f"[DEBUG] Dataset retrieved, length: {len(dict_['dataset'])}", level=logging.DEBUG) - - logger.log_message(f"[DEBUG] Retrieving styling index", level=logging.DEBUG) dict_['styling_index'] = self.styling_index.retrieve(query)[0].text - logger.log_message(f"[DEBUG] Styling index retrieved, length: {len(dict_['styling_index'])}", level=logging.DEBUG) dict_['hint'] = [] dict_['goal'] = query dict_['Agent_desc'] = str(self.agent_desc) - logger.log_message(f"[DEBUG] Checking if agent '{specified_agent.strip()}' exists in agent_inputs", level=logging.DEBUG) - logger.log_message(f"[DEBUG] Available agent_inputs keys: {list(self.agent_inputs.keys())}", level=logging.DEBUG) + logger.log_message(f"[FORWARD] Retrieved context - dataset length: {len(dict_['dataset'])}, styling_index length: {len(dict_['styling_index'])}", level=logging.DEBUG) if specified_agent.strip() not in self.agent_inputs: - logger.log_message(f"[ERROR] Agent '{specified_agent.strip()}' not found in agent_inputs", level=logging.ERROR) + logger.log_message(f"[FORWARD] ERROR: Agent '{specified_agent.strip()}' not found in agent_inputs", level=logging.ERROR) + logger.log_message(f"[FORWARD] Available agents: {list(self.agent_inputs.keys())}", level=logging.ERROR) return {"response": f"Agent '{specified_agent.strip()}' not found in agent inputs"} - - # Prepare inputs - logger.log_message(f"[DEBUG] Preparing inputs for agent '{specified_agent.strip()}'", level=logging.DEBUG) - logger.log_message(f"[DEBUG] Required inputs for agent: {self.agent_inputs[specified_agent.strip()]}", level=logging.DEBUG) # Create inputs that match exactly what the agent expects inputs = {} required_fields = self.agent_inputs[specified_agent.strip()] + logger.log_message(f"[FORWARD] Required fields for {specified_agent.strip()}: {required_fields}", level=logging.INFO) + for field in required_fields: if field == 'goal': inputs['goal'] = query @@ -1353,66 +1443,56 @@ async def forward(self, query, specified_agent): elif field == 'plan_instructions': inputs['plan_instructions'] = "" # Empty for individual agent use elif field == 'hint': - inputs['hint'] = "" # Empty string for hint + inputs['hint'] = "" # Empty string for hint else: # For any other fields, try to get from dict_ if available if field in dict_: inputs[field] = dict_[field] else: - logger.log_message(f"[WARNING] Field '{field}' required by agent but not available in dict_", level=logging.WARNING) + logger.log_message(f"[FORWARD] WARNING: Field '{field}' required by agent but not available in dict_", level=logging.WARNING) inputs[field] = "" # Provide empty string as fallback - logger.log_message(f"[DEBUG] Final inputs prepared: {list(inputs.keys())}", level=logging.DEBUG) - logger.log_message(f"[DEBUG] Inputs match required fields: {set(inputs.keys()) == required_fields}", level=logging.DEBUG) - - logger.log_message(f"[DEBUG] Checking if agent '{specified_agent.strip()}' exists in agents dict", level=logging.DEBUG) - logger.log_message(f"[DEBUG] Available agents: {list(self.agents.keys())}", level=logging.DEBUG) + logger.log_message(f"[FORWARD] Prepared inputs for {specified_agent.strip()}: {list(inputs.keys())}", level=logging.INFO) if specified_agent.strip() not in self.agents: - logger.log_message(f"[ERROR] Agent '{specified_agent.strip()}' not found in agents dict", level=logging.ERROR) + logger.log_message(f"[FORWARD] ERROR: Agent '{specified_agent.strip()}' not found in agents", level=logging.ERROR) + logger.log_message(f"[FORWARD] Available agents: {list(self.agents.keys())}", level=logging.ERROR) return {"response": f"Agent '{specified_agent.strip()}' not found in agents"} - # Execute agent - logger.log_message(f"[DEBUG] About to execute agent '{specified_agent.strip()}'", level=logging.DEBUG) + logger.log_message(f"[FORWARD] About to execute agent {specified_agent.strip()}", level=logging.INFO) result = await self.agents[specified_agent.strip()](**inputs) - logger.log_message(f"[DEBUG] Agent execution completed. Result type: {type(result)}", level=logging.DEBUG) - logger.log_message(f"[DEBUG] Agent result content: {str(result)[:200]}...", level=logging.DEBUG) - + # Track usage for template agents - logger.log_message(f"[DEBUG] Tracking usage for agent", level=logging.DEBUG) await self._track_agent_usage(specified_agent.strip()) - logger.log_message(f"[DEBUG] Converting result to dict", level=logging.DEBUG) try: result_dict = dict(result) - logger.log_message(f"[DEBUG] Result converted to dict successfully. Keys: {list(result_dict.keys())}", level=logging.DEBUG) + logger.log_message(f"[FORWARD] Agent execution successful, result keys: {list(result_dict.keys())}", level=logging.INFO) except Exception as dict_error: - logger.log_message(f"[ERROR] Failed to convert result to dict: {str(dict_error)}", level=logging.ERROR) - logger.log_message(f"[ERROR] Result type that failed conversion: {type(result)}", level=logging.ERROR) + logger.log_message(f"[FORWARD] Error converting agent result to dict: {str(dict_error)}", level=logging.ERROR) return {"response": f"Error converting agent result to dict: {str(dict_error)}"} - logger.log_message(f"[DEBUG] Creating output dict", level=logging.DEBUG) output_dict = {specified_agent.strip(): result_dict} - logger.log_message(f"[DEBUG] Output dict created successfully", level=logging.DEBUG) # Check for errors in the agent's response (not in the outer dict) - logger.log_message(f"[DEBUG] Checking for errors in agent response", level=logging.DEBUG) if "error" in result_dict: - logger.log_message(f"[DEBUG] Error found in agent response: {result_dict['error']}", level=logging.DEBUG) + logger.log_message(f"[FORWARD] Agent returned error: {result_dict['error']}", level=logging.ERROR) return {"response": f"Error executing agent: {result_dict['error']}"} - logger.log_message(f"[DEBUG] auto_analyst_ind.forward completed successfully", level=logging.DEBUG) + logger.log_message(f"[FORWARD] Successfully processed agent {specified_agent.strip()}", level=logging.INFO) return output_dict except Exception as e: - logger.log_message(f"[ERROR] Exception in auto_analyst_ind.forward: {str(e)}", level=logging.ERROR) + logger.log_message(f"[FORWARD] Exception in auto_analyst_ind.forward: {str(e)}", level=logging.ERROR) import traceback - logger.log_message(f"[ERROR] Full traceback: {traceback.format_exc()}", level=logging.ERROR) + logger.log_message(f"[FORWARD] Full traceback: {traceback.format_exc()}", level=logging.ERROR) return {"response": f"This is the error from the system: {str(e)}"} async def execute_multiple_agents(self, query, agent_list): """Execute multiple agents sequentially on the same query""" try: + logger.log_message(f"[MULTI] Executing multiple agents: {agent_list}", level=logging.INFO) + # Initialize resources dict_ = {} dict_['dataset'] = self.dataset.retrieve(query)[0].text @@ -1426,18 +1506,19 @@ async def execute_multiple_agents(self, query, agent_list): # Execute each agent sequentially for agent_name in agent_list: + logger.log_message(f"[MULTI] Processing agent: {agent_name}", level=logging.INFO) + if agent_name not in self.agents: + logger.log_message(f"[MULTI] Agent '{agent_name}' not found", level=logging.ERROR) results[agent_name] = {"error": f"Agent '{agent_name}' not found"} continue - # Prepare inputs for this agent - logger.log_message(f"[DEBUG] Preparing inputs for agent '{agent_name}'", level=logging.DEBUG) - logger.log_message(f"[DEBUG] Required inputs for agent: {self.agent_inputs[agent_name]}", level=logging.DEBUG) - # Create inputs that match exactly what the agent expects inputs = {} required_fields = self.agent_inputs[agent_name] + logger.log_message(f"[MULTI] Required fields for {agent_name}: {required_fields}", level=logging.DEBUG) + for field in required_fields: if field == 'goal': inputs['goal'] = query @@ -1454,26 +1535,34 @@ async def execute_multiple_agents(self, query, agent_list): if field in dict_: inputs[field] = dict_[field] else: - logger.log_message(f"[WARNING] Field '{field}' required by agent but not available in dict_", level=logging.WARNING) + logger.log_message(f"[MULTI] WARNING: Field '{field}' required by agent but not available in dict_", level=logging.WARNING) - logger.log_message(f"[DEBUG] Final inputs prepared for '{agent_name}': {list(inputs.keys())}", level=logging.DEBUG) - logger.log_message(f"[DEBUG] Inputs match required fields: {set(inputs.keys()) == required_fields}", level=logging.DEBUG) + logger.log_message(f"[MULTI] Prepared inputs for {agent_name}: {list(inputs.keys())}", level=logging.DEBUG) # Execute agent - agent_result = await self.agents[agent_name](**inputs) - agent_dict = dict(agent_result) - results[agent_name] = agent_dict - - # Track usage for template agents - await self._track_agent_usage(agent_name) - - # Collect code for later combination - if 'code' in agent_dict: - code_list.append(agent_dict['code']) + try: + agent_result = await self.agents[agent_name](**inputs) + agent_dict = dict(agent_result) + results[agent_name] = agent_dict + + # Track usage for template agents + await self._track_agent_usage(agent_name) + + # Collect code for later combination + if 'code' in agent_dict: + code_list.append(agent_dict['code']) + + logger.log_message(f"[MULTI] Successfully executed agent: {agent_name}", level=logging.INFO) + + except Exception as agent_error: + logger.log_message(f"[MULTI] Error executing agent {agent_name}: {str(agent_error)}", level=logging.ERROR) + results[agent_name] = {"error": str(agent_error)} + logger.log_message(f"[MULTI] Completed multiple agent execution. Results: {list(results.keys())}", level=logging.INFO) return results except Exception as e: + logger.log_message(f"[MULTI] Error executing multiple agents: {str(e)}", level=logging.ERROR) return {"response": f"Error executing multiple agents: {str(e)}"} @@ -1487,13 +1576,13 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): self.agent_inputs = {} self.agent_desc = [] - logger.log_message(f"Initializing auto_analyst for user_id: {user_id}", level=logging.INFO) - # Load user-enabled template agents if user_id and db_session are provided + logger.log_message(f"Loading user-enabled template agents for user {user_id}", level=logging.INFO) if user_id and db_session: try: - # For individual use, load all available templates (not just planner-enabled ones) - template_signatures = load_user_enabled_templates_from_db(user_id, db_session) + # For planner use, load planner-enabled templates (max 10, prioritized by usage) + template_signatures = load_user_enabled_templates_for_planner_from_db(user_id, db_session) + logger.log_message(f"Loaded {len(template_signatures)} templates for planner use", level=logging.INFO) for template_name, signature in template_signatures.items(): # Skip if this is a core agent - we'll load it separately @@ -1503,12 +1592,33 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): # Add template agent to agents dict self.agents[template_name] = dspy.asyncify(dspy.ChainOfThought(signature)) - # Determine if this is a visualization agent based on name - is_viz_agent = ('viz' in template_name.lower() or - 'visual' in template_name.lower() or - 'plot' in template_name.lower() or - 'chart' in template_name.lower() or - 'matplotlib' in template_name.lower()) + # Determine if this is a visualization agent based on database category + is_viz_agent = False + try: + from src.db.schemas.models import AgentTemplate + + # Find template record to check category + template_record = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() + + if template_record and template_record.category and template_record.category.lower() == 'visualization': + is_viz_agent = True + else: + # Fallback to name-based detection for legacy templates + is_viz_agent = ('viz' in template_name.lower() or + 'visual' in template_name.lower() or + 'plot' in template_name.lower() or + 'chart' in template_name.lower() or + 'matplotlib' in template_name.lower()) + except Exception as cat_error: + logger.log_message(f"Error checking category for template {template_name}: {str(cat_error)}", level=logging.WARNING) + # Fallback to name-based detection + is_viz_agent = ('viz' in template_name.lower() or + 'visual' in template_name.lower() or + 'plot' in template_name.lower() or + 'chart' in template_name.lower() or + 'matplotlib' in template_name.lower()) # Set input fields based on agent type if is_viz_agent: @@ -1518,12 +1628,10 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): # Store template agent description try: - from src.db.schemas.models import AgentTemplate - - # Find template record - template_record = db_session.query(AgentTemplate).filter( - AgentTemplate.template_name == template_name - ).first() + if not template_record: + template_record = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() if template_record: description = f"Template: {template_record.description}" @@ -1533,40 +1641,71 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): except Exception as desc_error: logger.log_message(f"Error getting description for template {template_name}: {str(desc_error)}", level=logging.WARNING) self.agent_desc.append({template_name: f"Template: {template_name}"}) - - logger.log_message(f"Loaded {len([t for t in template_signatures.keys() if t not in ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent']])} custom templates for individual use", level=logging.DEBUG) - + except Exception as e: logger.log_message(f"Error loading template agents for user {user_id}: {str(e)}", level=logging.ERROR) - # Load core agents (always load these, regardless of template preferences) - if not agents: - # Load the 4 core agents from database - core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] - - for agent_name in core_agent_names: - # Get the agent signature class - if agent_name == 'preprocessing_agent': - agent_signature = preprocessing_agent - elif agent_name == 'statistical_analytics_agent': - agent_signature = statistical_analytics_agent - elif agent_name == 'sk_learn_agent': - agent_signature = sk_learn_agent - elif agent_name == 'data_viz_agent': - agent_signature = data_viz_agent - - # Add to agents dict - self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + # Load core agents based on user preferences (not always loaded) + if not agents and user_id and db_session: + try: + # Get user preferences for core agents + from src.db.schemas.models import AgentTemplate, UserTemplatePreference - # Set input fields based on signature - if agent_name == 'data_viz_agent': - self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} - else: - self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} + core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] - # Get description from database - self.agent_desc.append({agent_name: get_agent_description(agent_name)}) - logger.log_message(f"Loaded core agent: {agent_name} with inputs: {self.agent_inputs[agent_name]}", level=logging.DEBUG) + for agent_name in core_agent_names: + # Check if user has enabled this core agent + template = db_session.query(AgentTemplate).filter( + AgentTemplate.template_name == agent_name, + AgentTemplate.is_active == True + ).first() + + if not template: + logger.log_message(f"Core agent template '{agent_name}' not found in database", level=logging.WARNING) + continue + + # Check user preference + preference = db_session.query(UserTemplatePreference).filter( + UserTemplatePreference.user_id == user_id, + UserTemplatePreference.template_id == template.template_id + ).first() + + # Core agents are enabled by default unless explicitly disabled + is_enabled = preference.is_enabled if preference else True + + if not is_enabled: + continue + + # Get the agent signature class + if agent_name == 'preprocessing_agent': + agent_signature = preprocessing_agent + elif agent_name == 'statistical_analytics_agent': + agent_signature = statistical_analytics_agent + elif agent_name == 'sk_learn_agent': + agent_signature = sk_learn_agent + elif agent_name == 'data_viz_agent': + agent_signature = data_viz_agent + + # Add to agents dict + self.agents[agent_name] = dspy.asyncify(dspy.ChainOfThought(agent_signature)) + + # Set input fields based on signature + if agent_name == 'data_viz_agent': + self.agent_inputs[agent_name] = {'goal', 'dataset', 'styling_index', 'plan_instructions'} + else: + self.agent_inputs[agent_name] = {'goal', 'dataset', 'plan_instructions'} + + # Get description from database + self.agent_desc.append({agent_name: get_agent_description(agent_name)}) + logger.log_message(f"Loaded core agent: {agent_name}", level=logging.DEBUG) + + except Exception as e: + logger.log_message(f"Error loading core agents based on preferences: {str(e)}", level=logging.ERROR) + # Fallback to loading all core agents if preference system fails + self._load_default_agents_fallback() + elif not agents: + # If no user_id/db_session provided, load all core agents as fallback + self._load_default_agents_fallback() else: # Load standard agents from provided list (legacy support) for i, a in enumerate(agents): @@ -1579,27 +1718,22 @@ def __init__(self, agents, retrievers, user_id=None, db_session=None): self.agents['basic_qa_agent'] = dspy.asyncify(dspy.Predict("goal->answer")) self.agent_inputs['basic_qa_agent'] = {"goal"} self.agent_desc.append({'basic_qa_agent':"Answers queries unrelated to data & also that include links, poison or attempts to attack the system"}) - logger.log_message("Added basic_qa_agent", level=logging.DEBUG) # Initialize coordination agents self.planner = planner_module() - self.memory_summarize_agent = dspy.ChainOfThought(m.memory_summarize_agent) - logger.log_message("Initialized planner and memory summarize agent", level=logging.DEBUG) + # self.memory_summarize_agent = dspy.ChainOfThought(m.memory_summarize_agent) # Initialize retrievers self.dataset = retrievers['dataframe_index'].as_retriever(k=1) self.styling_index = retrievers['style_index'].as_retriever(similarity_top_k=1) - logger.log_message("Initialized retrievers", level=logging.DEBUG) # Store user_id for usage tracking self.user_id = user_id - # Final logging - logger.log_message(f"Auto_analyst initialization complete. Total agents: {len(self.agents)}, Agent names: {list(self.agents.keys())}", level=logging.INFO) def _load_default_agents_fallback(self): """Fallback method to load default agents when preference system fails""" - logger.log_message("Loading default agents as fallback", level=logging.WARNING) + logger.log_message("Loading default agents as fallback for auto_analyst_ind", level=logging.WARNING) # Load the 4 core agents from database core_agent_names = ['preprocessing_agent', 'statistical_analytics_agent', 'sk_learn_agent', 'data_viz_agent'] @@ -1652,7 +1786,7 @@ async def _track_agent_usage(self, agent_name): ).first() if not template: - logger.log_message(f"Template '{agent_name}' not found", level=logging.WARNING) + logger.log_message(f"Template '{agent_name}' not found for usage tracking", level=logging.WARNING) return # Find or create user template preference record @@ -1661,35 +1795,33 @@ async def _track_agent_usage(self, agent_name): UserTemplatePreference.template_id == template.template_id ).first() - if preference: - # Update existing preference - preference.usage_count += 1 - preference.last_used_at = datetime.now(UTC) - preference.updated_at = datetime.now(UTC) - else: - # Create new preference record when template is used directly (via @mention) - # Direct usage doesn't auto-enable for planner but tracks usage + if not preference: + # Create new preference record (disabled by default) preference = UserTemplatePreference( user_id=self.user_id, template_id=template.template_id, - is_enabled=False, # Default disabled for planner - usage_count=1, - last_used_at=datetime.now(UTC), + is_enabled=False, # Disabled by default + usage_count=0, + last_used_at=None, created_at=datetime.now(UTC), updated_at=datetime.now(UTC) ) session.add(preference) + # Update usage tracking + preference.usage_count += 1 + preference.last_used_at = datetime.now(UTC) + preference.updated_at = datetime.now(UTC) session.commit() logger.log_message( - f"Tracked usage for template '{agent_name}' for user {self.user_id} (count: {preference.usage_count})", + f"Tracked usage for template '{agent_name}' (count: {preference.usage_count})", level=logging.DEBUG ) except Exception as e: session.rollback() - logger.log_message(f"Error tracking template usage for {agent_name}: {str(e)}", level=logging.ERROR) + logger.log_message(f"Error tracking usage for template {agent_name}: {str(e)}", level=logging.ERROR) finally: session.close() @@ -1698,7 +1830,6 @@ async def _track_agent_usage(self, agent_name): async def execute_agent(self, agent_name, inputs): """Execute a single agent with given inputs""" - logger.log_message(f"Executing single agent: {agent_name}", level=logging.DEBUG) try: result = await self.agents[agent_name.strip()](**inputs) @@ -1714,16 +1845,12 @@ async def execute_agent(self, agent_name, inputs): async def get_plan(self, query): """Get the analysis plan""" - logger.log_message(f"Getting plan for query: {query[:100]}...", level=logging.INFO) - dict_ = {} dict_['dataset'] = self.dataset.retrieve(query)[0].text dict_['styling_index'] = self.styling_index.retrieve(query)[0].text dict_['goal'] = query dict_['Agent_desc'] = str(self.agent_desc) - logger.log_message(f"Available agents for planning: {list(self.agents.keys())}", level=logging.INFO) - logger.log_message(f"Agent descriptions length: {len(self.agent_desc)}", level=logging.DEBUG) try: module_return = await self.planner(goal=dict_['goal'], dataset=dict_['dataset'], Agent_desc=dict_['Agent_desc']) @@ -1743,7 +1870,6 @@ async def get_plan(self, query): async def execute_plan(self, query, plan): """Execute the plan and yield results as they complete""" - logger.log_message(f"Executing plan: {plan}", level=logging.INFO) dict_ = {} dict_['dataset'] = self.dataset.retrieve(query)[0].text @@ -1755,10 +1881,8 @@ async def execute_plan(self, query, plan): # Clean and split the plan string into agent names plan_text = plan.get("plan", "").replace("Plan", "").replace(":", "").strip() - logger.log_message(f"Plan text after cleaning: {plan_text}", level=logging.DEBUG) if "basic_qa_agent" in plan_text: - logger.log_message("Executing basic_qa_agent", level=logging.INFO) inputs = dict(goal=query) agent_name, response = await self.execute_agent('basic_qa_agent', inputs) yield agent_name, inputs, response @@ -1766,7 +1890,6 @@ async def execute_plan(self, query, plan): plan_list = [agent.strip() for agent in plan_text.split("->") if agent.strip()] logger.log_message(f"Plan list: {plan_list}", level=logging.INFO) - # Parse the attached plan_instructions into a dict raw_instr = plan.get("plan_instructions", {}) if isinstance(raw_instr, str): @@ -1780,23 +1903,18 @@ async def execute_plan(self, query, plan): else: plan_instructions = {} - logger.log_message(f"Parsed plan instructions: {plan_instructions}", level=logging.DEBUG) # Check if we have no valid agents to execute if not plan_list or all(agent not in self.agents for agent in plan_list): - logger.log_message(f"No valid agents found in plan. Available agents: {list(self.agents.keys())}, Plan agents: {plan_list}", level=logging.ERROR) yield "plan_not_found", None, {"error": "No valid agents found in plan"} return # Execute agents in sequence for agent_name in plan_list: if agent_name not in self.agents: - logger.log_message(f"Agent '{agent_name}' not found in available agents: {list(self.agents.keys())}", level=logging.ERROR) yield agent_name, {}, {"error": f"Agent '{agent_name}' not available"} continue - - logger.log_message(f"Executing agent: {agent_name}", level=logging.INFO) - + try: # Prepare inputs for the agent inputs = {x: dict_[x] for x in self.agent_inputs[agent_name] if x in dict_} @@ -1807,15 +1925,14 @@ async def execute_plan(self, query, plan): else: inputs['plan_instructions'] = "" - logger.log_message(f"Agent inputs for {agent_name}: {list(inputs.keys())}", level=logging.DEBUG) + # logger.log_message(f"Agent inputs for {agent_name}: {inputs}", level=logging.INFO) # Execute the agent agent_result_name, response = await self.execute_agent(agent_name, inputs) - logger.log_message(f"Agent {agent_name} completed successfully", level=logging.INFO) yield agent_result_name, inputs, response except Exception as e: - logger.log_message(f"Error executing agent {agent_name}: {str(e)}", level=logging.ERROR) - yield agent_name, {}, {"error": f"Error executing {agent_name}: {str(e)}"} + logger.log_message(f"Error executing agent {agent_name}: {str(e)}", level=logging.ERROR) + yield agent_name, {}, {"error": f"Error executing {agent_name}: {str(e)}"} diff --git a/auto-analyst-frontend/components/chat/AgentSuggestions.tsx b/auto-analyst-frontend/components/chat/AgentSuggestions.tsx index 20dd1705..f1af1018 100644 --- a/auto-analyst-frontend/components/chat/AgentSuggestions.tsx +++ b/auto-analyst-frontend/components/chat/AgentSuggestions.tsx @@ -12,6 +12,7 @@ interface AgentSuggestion { description: string isCustom?: boolean isTemplate?: boolean + isPremium?: boolean } interface AgentSuggestionsProps { @@ -20,6 +21,7 @@ interface AgentSuggestionsProps { onSuggestionSelect: (agentName: string) => void isVisible: boolean userId?: number | null + onStateChange?: (hasSelection: boolean) => void } export default function AgentSuggestions({ @@ -27,7 +29,8 @@ export default function AgentSuggestions({ cursorPosition, onSuggestionSelect, isVisible, - userId + userId, + onStateChange }: AgentSuggestionsProps) { const [agents, setAgents] = useState([]) const [filteredAgents, setFilteredAgents] = useState([]) @@ -79,15 +82,15 @@ export default function AgentSuggestions({ const data = await response.json() const allAgents: AgentSuggestion[] = [] - // Add standard agents - if (data.standard_agents) { - data.standard_agents.forEach((agentName: string) => { - const standardAgent = standardAgents.find(agent => agent.name === agentName) - if (standardAgent) { - allAgents.push(standardAgent) - } - }) - } + // // Add standard agents + // if (data.standard_agents) { + // data.standard_agents.forEach((agentName: string) => { + // const standardAgent = standardAgents.find(agent => agent.name === agentName) + // if (standardAgent) { + // allAgents.push(standardAgent) + // } + // }) + // } // Add template agents (only for users with custom agents access) if (data.template_agents && data.template_agents.length > 0 && customAgentsAccess.hasAccess) { @@ -119,14 +122,15 @@ export default function AgentSuggestions({ if (response.ok) { const templateCategories = await response.json() const allTemplates: AgentSuggestion[] = [] - + console.log("templateCategories", templateCategories) // Flatten all templates from all categories templateCategories.forEach((category: any) => { if (category.templates) { const mappedTemplates = category.templates.map((template: any) => ({ name: template.agent_name, description: template.description, - isTemplate: true + // isTemplate: true, + isPremium: template.is_premium_only })) allTemplates.push(...mappedTemplates) } @@ -188,29 +192,35 @@ export default function AgentSuggestions({ ? message.slice(activeAtPos + 1, activeAtPos + 1 + spaceIndex) : textAfterAt - // Show suggestions if we're actively typing an agent name or just typed @ - if (!typedText.includes(' ')) { - // If no text after @, show all agents - if (typedText === '') { - setFilteredAgents(agents) - setSelectedIndex(agents.length > 0 ? 0 : -1) + // Show suggestions if we're actively typing an agent name or just typed @ + if (!typedText.includes(' ')) { + // If no text after @, show all agents + if (typedText === '') { + setFilteredAgents(agents) + setSelectedIndex(agents.length > 0 ? 0 : -1) + return + } + + // If there's text after @, filter agents that START WITH the typed text (autocomplete-style) + const filtered = agents.filter(agent => + agent.name.toLowerCase().startsWith(typedText.toLowerCase()) + ) + setFilteredAgents(filtered) + setSelectedIndex(filtered.length > 0 ? 0 : -1) return } - - // If there's text after @, filter agents based on that text - const filtered = agents.filter(agent => - agent.name.toLowerCase().includes(typedText.toLowerCase()) - ) - setFilteredAgents(filtered) - setSelectedIndex(filtered.length > 0 ? 0 : -1) - return - } } setFilteredAgents([]) setSelectedIndex(-1) }, [message, cursorPosition, agents, isVisible]) + // Report state changes to parent component + useEffect(() => { + const hasValidSelection = filteredAgents.length > 0 && selectedIndex >= 0 && selectedIndex < filteredAgents.length + onStateChange?.(hasValidSelection) + }, [filteredAgents, selectedIndex, onStateChange]) + // Handle keyboard navigation useEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { @@ -219,25 +229,30 @@ export default function AgentSuggestions({ switch (e.key) { case 'ArrowDown': e.preventDefault() + e.stopPropagation() setSelectedIndex(prev => prev < filteredAgents.length - 1 ? prev + 1 : 0 ) break case 'ArrowUp': e.preventDefault() + e.stopPropagation() setSelectedIndex(prev => prev > 0 ? prev - 1 : filteredAgents.length - 1 ) break case 'Enter': - e.preventDefault() - e.stopPropagation() + // Only handle Enter if there's a valid selection if (selectedIndex >= 0 && selectedIndex < filteredAgents.length) { + e.preventDefault() + e.stopPropagation() onSuggestionSelect(filteredAgents[selectedIndex].name) } + // If no valid selection, let the event bubble up to ChatInput break case 'Escape': e.preventDefault() + e.stopPropagation() setFilteredAgents([]) setSelectedIndex(-1) break @@ -245,8 +260,8 @@ export default function AgentSuggestions({ } // Add event listener to document to capture keyboard events - document.addEventListener('keydown', handleKeyDown) - return () => document.removeEventListener('keydown', handleKeyDown) + document.addEventListener('keydown', handleKeyDown, true) // Use capture phase + return () => document.removeEventListener('keydown', handleKeyDown, true) }, [isVisible, filteredAgents, selectedIndex, onSuggestionSelect]) // Scroll selected item into view @@ -288,7 +303,7 @@ export default function AgentSuggestions({
{agent.name}
- {agent.isTemplate && ( + {agent.isPremium && ( Template diff --git a/auto-analyst-frontend/components/chat/ChatInput.tsx b/auto-analyst-frontend/components/chat/ChatInput.tsx index 5ff61148..0ef71b42 100644 --- a/auto-analyst-frontend/components/chat/ChatInput.tsx +++ b/auto-analyst-frontend/components/chat/ChatInput.tsx @@ -228,6 +228,9 @@ const ChatInput = forwardRef< const [showCommandSuggestions, setShowCommandSuggestions] = useState(false) const [commandQuery, setCommandQuery] = useState('') + // Agent suggestions state + const [agentSuggestionsHasSelection, setAgentSuggestionsHasSelection] = useState(false) + // Get subscription from store instead of manual construction const { subscription } = useUserSubscriptionStore() const deepAnalysisAccess = useFeatureAccess('DEEP_ANALYSIS', subscription) @@ -1951,13 +1954,16 @@ const ChatInput = forwardRef< onChange={handleInputChange} onKeyDown={(e) => { if (e.key === 'Enter' && !e.shiftKey) { - // Check if agent suggestions are visible and should handle the Enter key + // Check if agent suggestions are visible and have a selection const isAgentSuggestionsVisible = !showCommandSuggestions && message.includes('@'); - if (isAgentSuggestionsVisible) { - // Don't handle Enter here, let AgentSuggestions component handle it - // The AgentSuggestions component will preventDefault if it handles the event + + if (isAgentSuggestionsVisible && agentSuggestionsHasSelection) { + // AgentSuggestions will handle Enter key since it has a selection + // Don't preventDefault here - let AgentSuggestions handle it return; } + + // If no agent suggestions selection, handle normally e.preventDefault() handleSubmit(e) } @@ -1990,6 +1996,7 @@ const ChatInput = forwardRef< onSuggestionSelect={handleAgentSelect} isVisible={!showCommandSuggestions && message.includes('@')} userId={userId} + onStateChange={setAgentSuggestionsHasSelection} />
diff --git a/auto-analyst-frontend/public/icons/templates/data_viz_agent.svg b/auto-analyst-frontend/public/icons/templates/data_viz_agent.svg new file mode 100644 index 00000000..e7d9cd53 --- /dev/null +++ b/auto-analyst-frontend/public/icons/templates/data_viz_agent.svg @@ -0,0 +1 @@ + diff --git a/auto-analyst-frontend/public/icons/templates/matplotlib_agent.png b/auto-analyst-frontend/public/icons/templates/matplotlib_agent.png new file mode 100644 index 0000000000000000000000000000000000000000..2d515bcefd84fae1aa19ee810f636848e360c3dd GIT binary patch literal 15321 zcmZv@bx<4c_XdgwhvE*wo#4RTHM{;ol@M2w75fYDDJd)ad#^&H=pnC{&Qz; zCbP4dygPfIv(Gv2v5iqvk;OnIL4|>V!H}1e(tv@1qlcb5A|pYMxcW!nBjfwfE}-9m04a?ZBM`k_ON$yCX2@#+c? zyl5P0D&;Skr9nU3n@zA=tEHv~Sgx;Md-d4wyTm9ST+SOi5ux|_|9tc38&|V{d;uj7p}<}Hn@2(qafX%CfkC724B&)xKMjyN2dYJZnU#x|B5bUkvoZdbmG&E8kbw) z{}x%`6Q<20R-04Yrx|%?j2veTyGp;We;qcZ_G16qY77H2$(3{!D;8<--x^iun$(dF zdy4P5S5oI^mJ3mEFiVS0W@OCI!KI0%X7VMeQ}WsA?%iT(uhTNm8%KEdi%}Vtgy*yk zI+KzaZrmx)RA2u!KyPeC@iA5L>5Gs17{!SRJ3Z~BoG)!9%O^#lR;J3;AkVUQg8{^# z=NaX@ts!=|#M^=&dEW+!3m6(H2w=`^D269@@_ksp4#&MuNI+;NcJy84gIN5;=L$>H zQmyH#k16>A?|vJ-*YMjJi3GkH-^8(WJlj7`PjJ+{SRWV~7bR?i13PnC*Uxeis?t;Y zCG_N%>kH*vjiRry+R}?7)6M8~*gu(T@TIO|GmRP5Q?=rJ#iLJgk>@t)2^2wNnh=tUU6WdJLR)WAEwaormujy05@!eTAEg9KU{|IW2r+P@Bd&Fg+$5x~gcS%%%;v zCP$?|L7i?ZKjdXZN0M}x1au{fs(!!=4=|f)M(T={e$sWLl1JD?+8hSJVL={Rux2@1 zqURFLXcs-l5_6u_Octov7GA z-k6Ehxi=V-e!wXiamLi!@6L=_ZTR^&db2;&iO?etBQSlkr65FvNJb~1THVreGE6!> z7#%YgN`IM+QxdKd-zAQkwFs_zWMMRemoz0X*^nv%9IQXQT{3}PDxY66peHM-U8Yw0eM1|h5U?y8H=SPN0 zTM1)r>f#a*^%NpM__2w)r4%v4$XLmVg6iQKRUVu*B2IDoZ))8unBT|fA}IMUt-}Dv zJCB15_lSK?lX4pUr}E&Dr`BbnJit7GFPY7T-3XDJ zRknLx3Fndh#vWAW5EVZY6_k*m8SZ;lbh|C@fR2-hXZ3m#?enF_ETp zA$()D;+T@6#i8#Xk!=&Kn!my05IvF&c!z<9?8M(<%qGNK;~%j`;jx6BRW~u-zzqZ_ zsLLO(#edtW#~>hK?jkF!OJ>3p*L1@U?K_1N{*&B7+#T^bDQf6Is@l5V8?STxE0`td zBAou^OB||_Qk%<chGOGm+JU1gxUxYE$CW3@+x1P!kUaD z)FttKJ8S2H7EAcDS0sf0VNuRG7?=v6Hjau)=^a5H+Ft1PYNVxOnC=rRrk`z*f1lWM zj^V<&FK%eS(W%x)g!jFN$*U!#{Wk;gZzJ5D$Yv4-r9fJW5-d;30~iKEG!Gy$VZ!AE z=x^_>VWLTRofJr*nh54Z&JBrtx696KcCsBGw<@2c^#lM#8_Ok9K`;VfN#|O8@Z{2t z7Bn~BSGdb*iAi-#N@9v<4a~nHN2k2chLIkgmaqW3?jqA-Ccr0n$)d|N)%i71kTgib zT1WJ)bHaiuOq?Qn!?FB=z-W|3leRxyFnI`x8pQ&BL zLS62t#rDa5;0|X8>0bB3W3Azq-O25}Nc~R^Kd8do0HLV|$wII$-x^R?hHNzU&6ZAi z6HxQXMg57)pZY;gS#V25nVj{o3SSpB?K*B?`E--5$oxFai zrjLY?jV;2U)v@p2$s!Xr=RpJ-;RGdVf#h^`ZEd-ZnXx^yfQnn6!0ei3I?x2al=zyV z@5BscM_-qvH#2jD0Io(T(It_A?tkRy(>1+k8HHCL``J5kfd9QU6W)C1jWYgMb(4qN zb$`=tXjhGvlRJE|4-BWI&Io~fC!PR$w$-0=!^V_9B4#)U2p@C`L5GqfYmEZ7eT zoM%F53V)BVzKNE+Nky3|jD4jaP92hxw2Y!)49Q0vJQk6y*lV%qM?ZGul$Dl7bXdK` z_|yB9sV@PE#>+ZF!bk^%NW)7EI|HH_eBd7Y9xqQeL|3PysR{qF*7)0j`AmPRe38J` znr$MYUt(xH_sAD*owWeNOJ8t*8$p>IJ6w~8zz*3*MW|m0d4>>ZcSIrpyp5oidNNCi zg5GJx!03bOgN|(%t012<$nP>F@{g8|jt+bC0)S@GoP~nY#ZT2&@Q1jO=94d~(z&ki z!Ap}V2;um51t$>24_(Pt2<2z+VSR}Q16VfuL_fDNUSn4?;o%i_*9s6|ENMUr*9Y|G z({lYVQf%MbYxqBpA+|aB%1;X(!A%9AW6$E7ZxfS3M#Dzn#81H*NIeUKvmriT0y`y~ z9`+Acn9vC@=XkQ|DhobHz{+>!k{1lW^-lnA_Mi{50i6BvXi06B1v~>>hba*h!Et0Y~GJ8VBRh)ejy9 zkHX)F7zVsyaFGa5e2vJZv(p6M%1pp zV8xhX(&%sL^f-STrffmD42y1PL&rD^vu*9c7L(kCD==ipQ(eCq9&1bk?_bC~Z#tIB zV;QA3QxyIf_5x(HB5^ZqpbDbE!^6{^O#HpDkln!G8H(e^$(Q|X6HuO2a8<&2^GtwL z;Dp^tpidK$@}6c0b4{>9J|y7j;aqi)vOXg97(fpYfESP_Xn2nQ94Tx|=Gt2~i}?{a zOl_j>J!vxQkwJ;A!wz6BpfdPg_wdHjLrm$V)rZ}VNgU#@fP)^X$+=3cnY`g97qcPb zC=*FtU<>!t04OGVXZBLA7X2G^IgNVuTpRkYM`eoLzu!63m1{t-Gscki{pK_9vUz!| zEpnd^C;bJ<=g$L@gWOaJ{N==Nbq6GdBO=|w$mjm*TNuquDJCoifW(Dkic-kvz z$pJ*;Z2oOHA_McwgMb(>JPI9N#nh9&!qaAW4)H_8&U*s=Ls;zqGchDy9e z`!B1cXGK=+>B5{OPjgS~d>IKWlN^1}W8%mvP!!&rj`fc)G!@j8G&0VFACc7>L+y;V z4~tLR&>c^nQ6ejzIXA{MZtV)S3yfK-m=ors+DJtR-pNg^Opu%8_=Y~%@_L*5Xb=a9 ztlkXo?r}fDL8n-voO}ArwB#J_!7b7E)57n##)StoI+=&K!`)wqZf<5G9VAEQ1Y;CT ziC4vBYyTYCK>xAE5SZhDKu15Zp`p}0B3nRIZ@##---ac_Xo&W{LsZhta#*&x(bdN} z%u#K4LpXx)*!2PI4G_S3Ox=YQXqkZkFk9vvKCs!41sB{>_Z;-*c!3(=P~f~nEt7Mw zhs0r*C$%c*@}IpoUWnGCr5kf3e&UjKp7RME1XkgIz6h2YYY>y&Pk`cjrLdyoKC*26 z+1$d;;xUlN035{_c6!eeX{nVJyS2gU44;r;sys#56;)IqJFa)iFAi?@(bh$7%uio> zHL->5WOq1Ap!+fk<4cE(8aav)BX&I<0l%AIHB7=;vb^QiRF*b_sXp=LsTsOdr&2_z zW-b1{hotv67w!=4Fnyd+=6s_($l5U>d8Aqx{cZG(!U>2m_loH*Q5lwKV0Lxi!^;Np z@Iib`3~dzUqk({&Xm+H&{b3VxU#zaR9SbmFk2H6L9nTD^#omdt% z|6cD-=C&iS;!!KkuSqCf!G2K?8`wz6B1x2AYjP=sRa>v_%`NaDybI?&rypp7nh z33TfT1tMx&-?bKG`ELBGLw+nw5SewQa#Xs^izq_2M-XP^&}!NY=1;O%O7?= zTbz7*yP^;L8+lU;nO|T4X*dyN4Z@oL?Y6iTS{qZxNuS9PLPE>rWxzv5c<>nzXxAJL z)gm%TqY&?8Y0?2}U`bk(WH|24WLc!jhz}L3F;*X8F_4;{qo^eQ85EZy#IRp#>z>WV zxp5#D0y#KUbBM|$;odCp?gh-qZ21z6dA};w6z7<0tXl1)m2D1LwmO&1Ja4=rB{J_Z zq`bMt5+gMF(RjYY>m`I+o3;fzpPisKE9)RDDs3sWGp|=;-Uw+DFv*N&OO_G35#gGJa8W%e~Yt1~n61r>{RnAO9<5rqPS=yL>%PdX`O~t;${9>&$3@WqKL= z>H-u2Tf_-4>FR0KP?|qN-)q^9VpL4HlhWP%H>#4cY$zINl{H!L0^!cGa`QQehC`^b zCxI2xq6Planm6-i!r1i6I?&uoJ#TfgSCCu0;0vJ=&DPDd7) zL$qi*A3?xW@ zakKEWZ_78#qLPkw&Eg6NR~C|s*~o7{Jbpg92OPPaPH6WpM$eHTCQdk%S;8_bDC=#O z-q+)lDc+;Se=kybqUISwJa1Bq_dr!SwsCt4_LnoO)@a9F4p6VO%}aYSVp0TmRCF|Y zn#E`RJcoq=V@5me%x(3AY3Q{&S6Km-*xHw_74rmt$+rl3jL`!imbS@%do6WT_&$B% z(K%iDR!)$~ZUBd@$;(!K?Mp~m_lJS^+8!mItALx8p1?0|-Q zuU06#=>fq`U0oNak?r-%d+U{!JiN=LLE3A>*}lYv3cr;w z+@XlxrCs&??N65Q#*&{v-{jze)-wZk8!h&}9A0_39L0u0hMrC1O0mgEwBTvc!1ay_ zC^!i9h6M5BGSl*uSEm%p!q9PRd~dn8*L<~=wi5VzXDF;_Ke4b@{I8@~tP)gdiXNm^!%67b!6 z6>BqZt61bdTMk_%!u8^b4^HZ1)FRN$^m*`!N1b923kSoYe_IuSdQ-dlz|ypJ05f+L zVJ~Ll?>SvpQwTBE%X+C(L(qHR;fi9AYK!H5!_-WPjSaJBa|M%nhq7dEPl??U+*7PS zAFU0qh>B6OWX!Z8;szkewW3651pIKXEhz5b@`{rG zmujNm#d`fE5l<9Fjh9XOZ4{O?UmemdYK)lj<1eQFQzE3r9Bg63J4|MmlH@wYUEp%6 z9J=`4qh%Q6mrMzAx5`rky4}+btR2+FFQUE^!}scm`_Ir2cPNdv=D;4|Q6Mbu1Qcmj zaNJ(3!rz_$RTOJ&_-oNj#m6FoZ%$L8dB9NqVbKaveNjb4j8aZD&k-H-P@mC5b))2% zCC@DlY|c2Lixb$&mep2MN_Ys*qjmcE+l=-R9|oeBWe>S~elAwf87DkYJJ8h4li#@FhCr&&+uv0tp+_T;M z#Ufo6>*6B}uySCp2HMAOy&tC}P0@aOo`q861#GE&V&oyih^W5tn5j;qygmMa5)l#6 z)MZ|4A&aRJEeXI1JyO>{o=aJcK4K9q6HGr86`5#tJ)dP~YM;AsGyvr#m0;b5Y`^~6 zhoHzFU?6gOwpL}DU9mhRepZTqko|4x7idENAnF}uaTY+Y*DLR#nJ*rSY8dk(5G6b; zI(22EaE_UFOh@Vk3>px4)1`I3roH}zGK>B>_!o1aYwN_ip;TgNdnNZ!kT*gXcv!R- z?D&22--bM*NQx=`=|k*%;t{$97s`ND?O!8=XnDof{#sra=f(~Kd$ZdLvpQ-;cxTpA zb|$&yc3mv>zdSQ^b1*r4j-}?B=oF_OCu-IucI6;lvK>EFQWuX*Sbss!;HRf2Ke#kr zyT#tk55TgP&4G;9LPe|~-6rscFyq#FXI}?iM?OaKbiVY4vT=uUCC({&-+iB$z`{Bu z)LJVY$DC4nM@Ksqs+xX{Gbs5koP9f9*5Zf(Fn!*o>>8Hf4ijc6_4Ye&T+1wj1qaXvGqE9@PUTu>@c@R{W3_tB@zsJS_f z@!s$zMV-^*z}BS<6&wm)U$DpP2;iEDY3v(%Qr-d6SdBME0YsF?T=ELbGpQfjpVHmZ z#P2>3?4j{DU5q9q>@}H9>-wHn#Xw*29G&lvqIrwL&N~uHRnuw!@D2BsZ8ReIj-Y`+ z_qy}Q)n6hS=QPQM>}zU}fco7&=CuuGqF0&~E9-jI5Wf))fK((1en~=J9>qQBwuEYs z-~y9~gds=uR1Vcjh4U@}3j zpxa&U2Gn#=cp>E+)4Alv^5yP}C{;an$V4Z}|5vyWJ}JjbQa{pw z^Wm*!rd<<6W<+Sp4f+KB9K0eKa{ux&v0zHXuZ5-`tqTGd{@PwVld6&*7p z-nCY#nSuic5lmnLaW1kmbqmAXLb}!3N@g0$!fTYPyVsVnSNa=B4-Gr@{ z@n0Rr?~Ow*!e|%H@&4!E=k$Vo9o-xbEEUgDRsM{tFt5~yG?yDllXf^iY}@=-wfJWj z*}E%JlctUsDjDrqLkp7|Wg!CwO;PF&5sBKc5pmkzk$4N)ml}OrOe9}{ynYBfbI6s1 z%sA5y9$!pfAV-eUG;Llie&CwZWDCRf6}D94Q93K(wNefRW%W_(p(@43#mWENjp`se zr1%!r*|}U+I2bEVV&lTH{OwTRDI$T^gd`=nyLsz*8oD#-RUPoQCp#)sG5#}@@e393 zuak5A==|N-SCKiSk5GAml(p+&=x;KS$c}D54xRX3wU(q*i?PQTK0e}3xUT{Hu zd`kC|ABfuZ8t#GGS8%DnQnS3!t*!R&Q))ImtQLaZi9o9&hI5S?)+dX2*jU0+r5q&^ zI#+CQiO)vV5@ve2iEwEtf18HjUm3x~dFja9Uu63$MvFkgoa@#_EN_Vogf?bn3q}k=EG9|3|B8Sr_n8}CbLJ;!Et9?zOEGximea3bcFine?Lg(fe6cvrxNR+CmHF%a3)Yon*+pg!;msNm zSrOK%b!<(}d;qzw`~D;sxG8&NX7tCmnn6asvY7 zy3>X7vXxU|h96RR6%Qmjn1qOjbN0dyaJA*mQRkGg^)8u=>A{B5CROH5W)Xuhkb*ap zWGs%~RL|q7({9O%dvH5>WCnz@PzZwTOEyl(*#GQQ5SI+05D075ZDC;MHi&v`xtAmq z#6y`>VCeunIr#)@N$Mu$nFo?aWs_$xV-&+(Gbvs+LA0 zlQU6JtC!J?Pe13I+N3do`U;Z_xTAJn2uw?R(2mOL^64pAMlRly1}+$Ay$~ynW7EwD zIdET8<>{PrFSFMWYUkrdoOjJF>2R_`$a8?C%^GzkJ$skKb~x||apKB2s}Yb8c?>FJ zR5B!*8&uoX0h0MS4RyoGxE8ajE1eh$$Fs%R`-RFV=EeiV<&voFJrdp2b9h{wmqJy3 zj0f-5s7-N<+d~4BY;h6)#WIY;P>;=>KNRQ?UyYbw!C|px>WB-uzyDfT^UMFX5I3`P z{EbNap2w?khB~+J)}fqVOKZ`fav5+bIGp`FhVQN!-~5@(?UZ%oV%lvL`f7#=vN62sf3mRIOguyB&TTIqU4261?(Uz4X!G~b{tF!VY%wQ z$PtxvViZ)Yur9y*&@V}Z2$4vk(Du{eQa5>(!OsQ#VP&^JS;?%gq>&$jsStJPC&}J; z?>`J@`)x?OYvyl#V0Z5ib}soJgA6Zd3$P0`ubhQUElbC0M{`DKcdcl_A2vQk+W+?M zJL%ysF>3wVWH)$nbc+vt2G>XU!4DOS2!~&~(e2I7*V>as7gW_hs%Xc+V&Wt8U$X|` zXvQ7aAeu~sh7;d{{FkUk^J`Y}t; zN7xF2Nhzx2beu5Kn|6IXXQc=s#6f`N41mnn0&K&=7@Z+o+$)izCRtdo^mh@xBjSQF zU=|wk4kYisWR>oj+K*m#eSm}z3(mu=I{C$u;oWQ%BIEj#I_*g^27Du)YeMC!9IhOf z&K|dkt7#SQ;v2b9BeUO|rtV`rF@!H%ecH?`mwdx;kNgKqNmRHg|5Qkl>i{7y%N0}5 zw9sAVXCi9TC%&j2QFXA4;;us`XSbMs3psWk{Ztf(g+?zDqumFBQ;c*(`BGNoKcV4K zNaEXdx>+JZgs5m}TZgkHr5xt6>-nF>_Sv$qvSCERUK%@b2C+iy&=NuAB{7W@e&;6bt&XAFr7Mqp3LMU#or=Pwwspyu# zY=*_!!iYHohSY>lM%*>w%*H6JY+6Ca&2#iD)KXw% zpmm=_W$vh;NxHh0s*Yqw)aiB}RpjeurJ5BAF9e$mt{{YI4qU@sTDEMC2J25mcuqcg zi|7o|-E5{#t?gJzhRh@RvS2P726CbUUfMe6w}tJL;H;lMlp@^{&2Jqd~eW6su(e-YAN*Jii${yVDiQ9l_uH~?Kk z08F}1TQ7JbQV~ymBjMht`ZGGHf~Udu&E;mHZe=IS{Ym`aN1|?8PD_R(ciZi=1zt`Z zNp^g}yU@)=J$~m8wE^T;{Cicu2FRox{fyX1_n2*XIP;8tan#%aPrxpXI)1R*hs&tGokb6TbvQLt zX3$7s?LKl0a#S)(%*?whCwhk)s$gwC#=3sch)HL3zMqqtM4^5f1&W9%Cu{g`Tqe_K zP_TgK`5FpvXg;d^xQJNi{amd^NT=E)F+%KfbD zOgJ+pdC;Z;3IYZ>vXSq7_WV~%x;932CqVDU&khjXw+6xVBk&<7`0)y7dqKPZNOuevP$-G zRHl7}SecoiyZfRS?3?HkhynvGnM`i-2$Fx{9x(mZ9Ttku?EVjBMm+%qe`L(*Nf+7f zUqX+p7M7dQV(N0{=qe-0NQ_5z!m^sg-GK_~7=7@bAX4Ur;E%7h`;i z=R0-khpRdp`I22Z;?TfiSz>q01#2yjLa!uX28ax<^v8|DGXAoLpZjS?M@21o)kxZe6_1UFq186u^3^ibzl}I< zZ97kdbnm+jPi50y^IBhhtRHRGJ%3c{MX*D%heo#z-7y_4#<@zhBFZ&xto=2F0WK+b=z@WIMv#Xyq<9|~Y z$x{V2<-L64#YR67d!RE1qh3)eCWXT$QmT#nH;T%a#%hyo6i?6`Zw&H*!IOLC8t#A~ z5N*^bB<+Mo>p#ZS=}4Q{n-OH=_IR|AKALh9wtINp*{A==pB>x)O8piDFK8_U#k!ha z!~EAzdw1L=I!x=3iYWs)Br)3l`sPSr0Et07?067DC3AIkr4ke&Ay_vSHxlsq(`fdR zXc0$|JfY*$2FH`VZ z?}jsZu>D8D;2)hVw2COUqw9qPO)uJe!Pdn$$U-XKS~Q>#^7g{ za1No`eZ@D~EpvcIDM*zc+E$5`8`uimmNXJx+QW7xt;pJ#AUz|E40`e7-hyoFroooMcpt5|^ayz@W=iR|Ftkb# z(6YtQCoiple2aQT=fZc|q&}x$&k+nXDViSfl9@1UUCo?sm9N5nKkCYZ_Hxxki2X#UllvZWNWHqAntf9;Kr@q`D?PAMgdhSA$ zg1%m*i6jOVm zDe~E5xMlslM#K!I`1cOT+|V_aHzWn@mucIDoKw;M&qf2aw-JMQ{wi(}sWt>kc%)Ld zVu6Q*4@())ZgOXDvtm8Nq4l#K8zMK#9bIDU5a{TMtN6>0cdbJZtD?MmMX^4vk779H z&cFc+ih|PiCs;FByyPrI`di+rk3{L2nV}eJlk(v}WIl{VO=pd?4=}A79z)r-nSob<@INT=c!^X-{Hn!`2 zDY|;ixI9rq3F1}xq5%#5`phJQ`#IO0Lp5{#ZV*Dqb}|Jtb12@4l?VHnmoI^STR<1$ zoi-byVN$lQurQG@=2K(VTpxodZ*z0(@MC`}7Lh-lDiQmor51nysqQh!g5}I2{(5M!ZgbKN zAvEr=>?Oh?@i{r^$8W^8FPh>vz|p@zs4e<)@Hex4w;W(0i`ydwq#K8X*x2U3a2?>L z^!Ay!#s}rQOepNa!!5phs}xAl7(n}uOwT^RLBWLG)a;w>^FY?uWsNE-EnFIOFW+~SI z%vB0W^!aK|C_{t+I6T?KXP2d9`t!-j7RCTF9j_D?S|^sEOI2T#5tJ&5w9d}l*Y9Be zwK2T-tIowWEbQ<9ZKiD!(xmGLx~!BjlUdmB)SMC&t;i^3_M86jqQx!7(X0n+7+<4a zVh`f!RraYUEtUjNsh{zoW1!%RVek-ohNUh+pLNZ${4|rO`c&OFi^k{3?&02WI6MP~ zwynGYsv9ktl916|5nMCVHE}y(~nE8lF< zY&Nc3uhkc_jmnBYQfQm#IRZ7hvAWF+u{h)Hwj!ZvsI8XZ^$u@j{RSj|lgux}pcPpt z;h3&R>Cz0};UQM`P{uw1g)$9}VuN4&fEnTS~zK>?Bw3J#XrjE>NEs4eL7bB>tO`gSyS5SOk1; zGy%BB)cyfQ2AUJC2h)d4cQNs{<8R(yH>;UfHmA4Z-rY=dNy&DU)En~5{V}<4eDF}1 zuR!HNgwW9oLbrn1!D;@9Ncj9FF!8PShb9Nj#xN@oeWYQDt{xxn6gq4;AhU< zESw3I*^WyQ$hfu95v_V2IZaNNEO+_wyOXbFgeJ?t;m+|~UH_y0rW@zxf2i-RJYGLq zc9|@`#AUc>>pYiC4)^B>*4fM#bc=o!R+#&v-gl{lV<3NTXrFsx0+px0qSWeDBXSTD@+9S(EHz`fMWZEsY8$~8}V&e)lY@!!D>atIgl`lCA3~{S4P9+EEt`cZB znIV^8hzWwH7#WOS;KIbS?}wEw57;dgWv6UiHG)>RVRvzJYoiE4ni%+-l z1;xsTZ>0+nu0#CO1-AOy-45hDMSY!l!llB4d)5n@6a}B$OWE>4Rbnv-NfFR_&3V2A znxz&GmhdD0ibL(G^fi%(B#QCUiI}0E@QB>|hoTN29(Ub;Y5T5;S$rMg)i~V*l#$2p z>_W33g_^YW=@#9+JM+&|-+f1C>p7vh9DO~3q<}ucUMYX@2vzd2Q8oy9hEb$s6x#N$ zt=YB0U+@c$W=Dsyjah?>j{md4A|N^36hJfHrs-VFb=dLQR7OT{Q2H~78%JrCU-#v6 zceNx$aZ%hvx82;Q!~pyiQ79xdH;h5~`UZ}8x7QauYgT=6-Q9tXqu>v5ZvPin^2d#_IvEkk#sv4c zi-l_byHd37+pq2(3s7cwMmNq}!Ff(`1&v{`T*Php z>FUA7)tuf!-{#!FTA|0}8f~WA>&sW4v#l<_>E%@&wbcEV%?EhgRgBWFwz7#t>_5tm z&2^ivBH);qC(@R=3zTw`rXJxY$ZJD>pRLfq9h2P0i>TS5E1)N;ZZ*y@^>#D~t*Wo5 zn*_w}EhkRCs+l)X7@r9;!!{V7W4fR-ATGBMZG)LbhCv(rjPU|b`3OI~IqsUNKzFjp zZ#n~O$aXvd6bBYOOKiJ#LEQ(s%$n#*uBKmgGv{}rRMSbzyI9$t?->AeOnbO}E}Q^* z2Y>=EFK-7?ON)2-?=J+$d8s~bafoQ2JRHRjeBt-~;r8>BG48Fhc2dq(7BT8>-u(#@ zQLmSp$oG`5mOuEk=NVw{Jk@I&$3i_UYaT<&VU&Xd#j*; zQ%K?Db9?l`3*RZROG25bb1`B7PKuvdQ5=k4!r*`vyR=j(sUgkg>)1b6+6Arnz$aw{ zS`2fDygmfhb83(x%d|`JN8c<1%SUt0(`GKNcR?Qy$k)wh=KV6a&XNb^yqktG%3r$h zKj{nsztpJRnNG=Fwugb3mVWDaxr={FYOR+sOZQJy)LmQF6&_}ZZJwQLqBdqLLgvn$>j~n?g(%C|FjDwgwyTGd(~ts(M2{mO zCq-%LUqM5@tEUlyixCC>Y(v&dEJq?E5ot7SnB7eY7y_K9S)_;JLL)hI^Ch*d`to+~ zvIutDj)&NKMl@5tk7m&pZK7)qac{Ft<*ku3@0@jb3doL4Pi#?TGZztcqe95&q8AY0 ztA+_;ZKMF_AKMiyx3sa!=}^D0iGdX(bigm?v-e$rp>o6Q=n(n1%bFG z8Kn+gPn}7zxtW>5B%OX#rJ2l@m=@fIv9gG%qs-+v_e3(jv9AnY_oOu^gn)H=72;!I(W3G8|`>M%}&8JP3uxB^Z51xYq5i~Wq`i5!5B z-!dn}$^7i=+7W?&_E66HNvB5ksvlraKCsQV@1rGE(8qo!_f0rqwG_j-9g!e_OReOS z9n0CjhQ$W1gy34F%)VI^rkb4&gO0%g|AUZd^l%}>&NsabvNV23q^Kda!3k-=V!)vV z!Oys2As=k5!#vEN1GCnqp%%Bgxg`fHe?K_Pdx?w}2ziZaV`U&i$-{HJ*^Ad&gViW~ zH2LY}+7R|4tQ_%y)$sWBX1a&AA2mhDj^9&eikFTX%Bg55zFM5)}_-VY&KX?Wyj74*%y9Ym`_-<_-b`Vk6!e zIlR$TocHSC&81TD!2%}`z_Q~HbKj4}#n^fb%*#&9sV27Ooz$8zp)FHyGN)G?th<6# z;N4q%Yr&s1He1%Z7wtSUV7Qc4$)^mO?ml@FT{hGC1;O^x3SHj=F=o1pY88L?)Mk{0 z)EQ2TQ^6>Rw5o-Q(60xS{BvYrcrwvvo(!Swgy`GgafH+LRoeT^^pg|8(-e*6%i%;% z+$<`WY|{xr&k4wZc_$WDw$RsbYzg7$cA0s!L<9^K!2x1(|Ch<>C*6a#mto5V0~RaQ z`R*zVCG6lsB2FFhn=z-L51jTYdE~%wY#a+{5h^hpzt9jR2zNZymlAcAJnz2lmXuf7 z_{FrEr?tsa*)+8NFbl*-PBZk~f{(8cn20TbJSv`01|UP>1`Cjha>z_(o@szjcmt}Z zNx3x@)Y0p%YRlpg=?@Ktga7YR3%GBWAXz9j%L;vT-Gr3d@ z>pb#9ndypF|LBdJNnA{e0}46La05<=Km_DzM0^W<|GOvjaoUl^FoQ=Ud@gM``QG#x za0yw{x=YHAPByTD)?ZJ}mEDs5QEVa5d)gp4d|rL6;<#mk3Xv?%+lk4Ag$Pw`RmCjN zNJ<5KI=eGMm4e~U)sYwP^=90~Snw?OZC7s|diA>BYVP9RS1-{odnO;p@0$%Q4Z0Ru z%HEh?Dq0~;`SPz560>jRptDXc;HW%_*G|1Z30ls4mYlv2Q6-M$6eT0W?E*- zTG%50J*FmvpiDw_<&7PXP-dJsm>BIIHDHReMlFA8=a{0u3whig)@L$#&7&91iE-#s zWqp$Swej!&l`hw^>I~=P$4?q4CyIwO*5b;ez^Ur?YzjeOoT&&q)3j36v^=53mE-B= z|CX2!e_n`=N=US%qn<;#YuYqtWDU01?Aj;NHQhLI8g2+gfhuhx490>F56kX9!br%C zgvb-)VJ!+kA%`a?!>4^y6MpIaPXXcKWLZ@a+gm^Wdssl)-QH59*r;Fs`vJWU`v3pW cVR=+6gwt%BI#WCKzxQb6rB$SABuqp9AC + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/auto-analyst-frontend/public/icons/templates/preprocessing_agent.svg b/auto-analyst-frontend/public/icons/templates/preprocessing_agent.svg new file mode 100644 index 00000000..26c18c46 --- /dev/null +++ b/auto-analyst-frontend/public/icons/templates/preprocessing_agent.svg @@ -0,0 +1 @@ + diff --git a/auto-analyst-frontend/public/icons/templates/sk_learn_agent.svg b/auto-analyst-frontend/public/icons/templates/sk_learn_agent.svg new file mode 100644 index 00000000..5a32f797 --- /dev/null +++ b/auto-analyst-frontend/public/icons/templates/sk_learn_agent.svg @@ -0,0 +1,111 @@ + + + +image/svg+xml + + + + + + + + + + + + + + +scikit + + + + + + + \ No newline at end of file From 188898195d4539128437aa3143471335c0f58cd1 Mon Sep 17 00:00:00 2001 From: Ashad Qureshi Date: Tue, 17 Jun 2025 21:32:52 +0500 Subject: [PATCH 6/7] Bug Fixes v1 --- auto-analyst-backend/Dockerfile | 2 + auto-analyst-backend/chat_database.db | 4 +- auto-analyst-backend/entrypoint.sh | 56 +++ .../manage_sqlite_templates.py | 291 ++++++++++++++ .../scripts/populate_agent_templates.py | 360 +++++++++++++++--- .../custom-templates/TemplatesModal.tsx | 142 ++++--- .../custom-templates/useTemplates.ts | 57 ++- 7 files changed, 806 insertions(+), 106 deletions(-) create mode 100644 auto-analyst-backend/manage_sqlite_templates.py diff --git a/auto-analyst-backend/Dockerfile b/auto-analyst-backend/Dockerfile index d373f797..403ae0fe 100644 --- a/auto-analyst-backend/Dockerfile +++ b/auto-analyst-backend/Dockerfile @@ -14,6 +14,8 @@ COPY --chown=user . /app # Make entrypoint script executable USER root RUN chmod +x /app/entrypoint.sh +# Make populate script executable +RUN chmod +x /app/scripts/populate_agent_templates.py USER user # Use the entrypoint script instead of directly running uvicorn diff --git a/auto-analyst-backend/chat_database.db b/auto-analyst-backend/chat_database.db index 1197c58b..e99a3734 100644 --- a/auto-analyst-backend/chat_database.db +++ b/auto-analyst-backend/chat_database.db @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2d3913c49d36bcbb74c9eb4a9277bb20be677bc89d47c2abf16e9a64e6282c2 -size 761856 +oid sha256:061a038faca46d439d7178af5bc140ee7a523d4ef00cf57ca75d2d9e708fcdb2 +size 69632 diff --git a/auto-analyst-backend/entrypoint.sh b/auto-analyst-backend/entrypoint.sh index 8e82044f..2ce6c9ac 100644 --- a/auto-analyst-backend/entrypoint.sh +++ b/auto-analyst-backend/entrypoint.sh @@ -70,6 +70,59 @@ except Exception as e: # Don't exit on database connectivity issues - let app try to start } +# Function to populate agents and templates for development (SQLite only) +populate_agents_templates() { + echo "🔧 Checking if agents/templates need to be populated..." + python -c " +try: + from src.db.init_db import DATABASE_URL + from src.db.schemas.models import AgentTemplate + from src.db.init_db import session_factory + + # Check database type + if DATABASE_URL.startswith('sqlite'): + print('🔍 SQLite database detected - checking template population') + + session = session_factory() + try: + template_count = session.query(AgentTemplate).count() + + if template_count == 0: + print('📋 No templates found - populating agents and templates...') + session.close() + exit(1) # Signal that population is needed + else: + print(f'✅ Found {template_count} templates - population not needed') + session.close() + exit(0) # Signal that population is not needed + except Exception as e: + print(f'⚠️ Error checking templates: {e}') + print('📋 Will attempt to populate anyway') + session.close() + exit(1) # Signal that population is needed + else: + print('🔍 PostgreSQL/RDS detected - skipping auto-population') + exit(0) # Signal that population is not needed + +except Exception as e: + print(f'❌ Error during template check: {e}') + exit(0) # Don't fail startup, just skip population +" + + # Check if population is needed (exit code 1 means yes) + if [ $? -eq 1 ]; then + echo "🚀 Running agent/template population for SQLite..." + python scripts/populate_agent_templates.py auto + + if [ $? -eq 0 ]; then + echo "✅ Agent/template population completed successfully" + else + echo "⚠️ Agent/template population had issues, but continuing..." + echo "📋 You may need to populate templates manually" + fi + fi +} + # Main startup sequence echo "🔧 Initializing production environment..." @@ -82,6 +135,9 @@ init_production_database # Test database connectivity (non-failing) verify_database_connectivity +# Populate agents and templates for development (SQLite only) +populate_agents_templates + echo "🎯 Starting FastAPI application..." echo "🌐 Application will be available on port 7860" diff --git a/auto-analyst-backend/manage_sqlite_templates.py b/auto-analyst-backend/manage_sqlite_templates.py new file mode 100644 index 00000000..742cb16e --- /dev/null +++ b/auto-analyst-backend/manage_sqlite_templates.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +""" +Script to populate agent templates. +These templates are available to all users but usable only by paid users. +""" + +import sys +import os +from datetime import datetime, UTC + +# Handle path for SQLite database access +script_dir = os.path.dirname(os.path.abspath(__file__)) +backend_dir = os.path.dirname(script_dir) +original_cwd = os.getcwd() + +# Add the backend directory to the Python path +sys.path.append(backend_dir) + +# Change to backend directory for SQLite database access +os.chdir(backend_dir) + +try: + from src.db.init_db import session_factory, DATABASE_URL + from src.db.schemas.models import AgentTemplate + from sqlalchemy.exc import IntegrityError + + # Check if we're using SQLite + is_sqlite = DATABASE_URL.startswith('sqlite') + print(f"🔍 Database type: {'SQLite' if is_sqlite else 'PostgreSQL'}") + print(f"📁 Working directory: {os.getcwd()}") + +except ImportError as e: + print(f"❌ Error importing database modules: {str(e)}") + os.chdir(original_cwd) + sys.exit(1) + +# Template agent definitions +AGENT_TEMPLATES = { + "Visualization": [ + { + "template_name": "matplotlib_agent", + "display_name": "Matplotlib Visualization Agent", + "description": "Creates static publication-quality plots using matplotlib and seaborn", + "icon_url": "/icons/templates/matplotlib.svg", + "prompt_template": """ +You are a matplotlib/seaborn visualization expert. Your task is to create high-quality static visualizations using matplotlib and seaborn libraries. + +IMPORTANT Instructions: +- You must only use matplotlib, seaborn, and numpy/polars for visualizations +- Always use plt.style.use('seaborn-v0_8') or a clean style for better aesthetics +- Include proper titles, axis labels, and legends +- Use appropriate color palettes and consider accessibility +- Sample data if len(df) > 50000 using: df = df.sample(50000, random_state=42) +- Save figures with plt.tight_layout() and high DPI: plt.savefig('plot.png', dpi=300, bbox_inches='tight') +- Always end with plt.show() + +Focus on creating publication-ready static visualizations that are informative and aesthetically pleasing. +""" + }, + { + "template_name": "seaborn_agent", + "display_name": "Seaborn Statistical Plots Agent", + "description": "Creates statistical visualizations and data exploration plots using seaborn", + "icon_url": "/icons/templates/seaborn.svg", + "prompt_template": """ +You are a seaborn statistical visualization expert. Your task is to create statistical plots and exploratory data visualizations. + +IMPORTANT Instructions: +- Focus on seaborn for statistical plotting (distributions, relationships, categorical data) +- Use matplotlib as the backend for customization +- Create informative statistical plots: histograms, box plots, violin plots, pair plots, heatmaps +- Apply proper statistical annotations and significance testing where relevant +- Use seaborn's built-in themes and color palettes for professional appearance +- Include statistical summaries and insights in plot annotations +- Handle categorical and numerical data appropriately +- Always include proper legends, titles, and axis labels + +Focus on revealing statistical patterns and relationships in data through visualization. +""" + }, + ], + "Data Manipulation": [ + { + "template_name": "polars_agent", + "display_name": "Polars Data Processing Agent", + "description": "High-performance data manipulation and analysis using Polars", + "icon_url": "/icons/templates/polars.svg", + "prompt_template": """ +You are a Polars data processing expert. Perform high-performance data manipulation and analysis using Polars. + +IMPORTANT Instructions: +- Use Polars for fast, memory-efficient data processing +- Leverage lazy evaluation with pl.scan_csv() and .lazy() for large datasets +- Implement efficient data transformations using Polars expressions +- Use Polars-specific methods for groupby, aggregations, and window functions +- Handle various data types and perform type conversions appropriately +- Optimize queries for performance using lazy evaluation and query optimization +- Implement complex data reshaping (pivots, melts, joins) +- Use Polars datetime functionality for time-based operations +- Convert to pandas only when necessary for visualization or other libraries +- Focus on performance and memory efficiency + +Focus on leveraging Polars' speed and efficiency for data processing tasks. +""" + }, + { + "template_name": "data_cleaning_agent", + "display_name": "Data Cleaning Specialist Agent", + "description": "Specialized in comprehensive data cleaning and quality assessment", + "icon_url": "/icons/templates/data-cleaning.png", + "prompt_template": """ +You are a data cleaning specialist. Perform comprehensive data quality assessment and cleaning. + +IMPORTANT Instructions: +- Detect and handle missing values, duplicates, and outliers +- Identify data type inconsistencies and fix them +- Perform data validation and quality checks +- Handle inconsistent formatting (dates, strings, numbers) +- Detect and fix encoding issues +- Create data quality reports with statistics and visualizations +- Implement robust cleaning pipelines +- Flag potential data quality issues for manual review +- Use appropriate imputation strategies based on data characteristics +- Document all cleaning steps and transformations applied + +Focus on delivering high-quality, analysis-ready datasets with comprehensive documentation. +""" + }, + { + "template_name": "feature_engineering_agent", + "display_name": "Feature Engineering Agent", + "description": "Creates and transforms features for machine learning models", + "icon_url": "/icons/templates/feature-engineering.png", + "prompt_template": """ +You are a feature engineering expert. Create, transform, and select features for machine learning. + +IMPORTANT Instructions: +- Create meaningful features from existing data (polynomial, interaction, binning) +- Encode categorical variables appropriately (one-hot, label, target encoding) +- Scale and normalize numerical features +- Handle datetime features (extract components, create time-based features) +- Perform feature selection using statistical tests and model-based methods +- Create domain-specific features based on data context +- Handle high-cardinality categorical features +- Use cross-validation for feature selection to avoid overfitting +- Visualize feature distributions and relationships +- Document feature creation rationale and transformations + +Focus on creating predictive features that improve model performance while avoiding data leakage. +""" + } + ] +} + +def populate_templates(): + """Populate the database with agent templates.""" + session = session_factory() + + try: + # Track statistics + created_count = 0 + skipped_count = 0 + + for category, templates in AGENT_TEMPLATES.items(): + print(f"\n--- Processing {category} Templates ---") + + for template_data in templates: + template_name = template_data["template_name"] + + # Check if template already exists + existing = session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() + + if existing: + print(f"⏭️ Skipping {template_name} (already exists)") + skipped_count += 1 + continue + + # Create new template + template = AgentTemplate( + template_name=template_name, + display_name=template_data["display_name"], + description=template_data["description"], + icon_url=template_data["icon_url"], + prompt_template=template_data["prompt_template"], + category=category, + is_premium_only=True, # All templates require premium + is_active=True, + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC) + ) + + session.add(template) + print(f"✅ Created template: {template_name}") + created_count += 1 + + # Commit all changes + session.commit() + + print(f"\n--- Summary ---") + print(f"Created: {created_count} templates") + print(f"Skipped: {skipped_count} templates") + print(f"Total templates in database: {created_count + skipped_count}") + + except Exception as e: + session.rollback() + print(f"❌ Error populating templates: {str(e)}") + raise + finally: + session.close() + # Restore original working directory + os.chdir(original_cwd) + +def list_templates(): + """List all existing templates.""" + session = session_factory() + + try: + templates = session.query(AgentTemplate).order_by(AgentTemplate.category, AgentTemplate.template_name).all() + + if not templates: + print("No templates found in database.") + return + + print(f"\n--- Existing Templates ({len(templates)} total) ---") + + current_category = None + for template in templates: + if template.category != current_category: + current_category = template.category + print(f"\n{current_category}:") + + status = "🔒 Premium" if template.is_premium_only else "🆓 Free" + active = "✅ Active" if template.is_active else "❌ Inactive" + print(f" • {template.template_name} ({template.display_name}) - {status} - {active}") + print(f" {template.description}") + + except Exception as e: + print(f"❌ Error listing templates: {str(e)}") + finally: + session.close() + # Restore original working directory + os.chdir(original_cwd) + +def remove_all_templates(): + """Remove all templates (for testing).""" + session = session_factory() + + try: + deleted_count = session.query(AgentTemplate).delete() + + session.commit() + print(f"🗑️ Removed {deleted_count} templates") + + except Exception as e: + session.rollback() + print(f"❌ Error removing templates: {str(e)}") + finally: + session.close() + # Restore original working directory + os.chdir(original_cwd) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Manage agent templates") + parser.add_argument("action", choices=["populate", "list", "remove-all"], + help="Action to perform") + + args = parser.parse_args() + + try: + if args.action == "populate": + print("🚀 Populating agent templates...") + populate_templates() + elif args.action == "list": + list_templates() + elif args.action == "remove-all": + confirm = input("⚠️ Are you sure you want to remove ALL templates? (yes/no): ") + if confirm.lower() == "yes": + remove_all_templates() + else: + print("Operation cancelled.") + # Restore directory even if cancelled + os.chdir(original_cwd) + except Exception as e: + print(f"❌ Script failed: {e}") + # Ensure directory is restored on any error + os.chdir(original_cwd) + sys.exit(1) \ No newline at end of file diff --git a/auto-analyst-backend/scripts/populate_agent_templates.py b/auto-analyst-backend/scripts/populate_agent_templates.py index 97894af3..d1d7b87d 100644 --- a/auto-analyst-backend/scripts/populate_agent_templates.py +++ b/auto-analyst-backend/scripts/populate_agent_templates.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 """ -Script to populate agent templates. -These templates are available to all users but usable only by paid users. +Enhanced Script to populate agent templates for development. +Includes both default agents (free) and premium templates. +Automatically detects database type and populates accordingly. """ import sys @@ -11,13 +12,200 @@ # Add the project root to the Python path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from src.db.init_db import session_factory +from src.db.init_db import session_factory, DATABASE_URL from src.db.schemas.models import AgentTemplate from sqlalchemy.exc import IntegrityError -# Template agent definitions -AGENT_TEMPLATES = { - "Visualization": [ +def get_database_type(): + """Detect database type from DATABASE_URL""" + if DATABASE_URL.startswith('postgresql'): + return "postgresql" + elif DATABASE_URL.startswith('sqlite'): + return "sqlite" + else: + return "unknown" + +# Default agents (free for all users) +DEFAULT_AGENTS = { + "Data Manipulation": [ + { + "template_name": "preprocessing_agent", + "display_name": "Data Preprocessing Agent", + "description": "Cleans and prepares a DataFrame using Pandas and NumPy—handles missing values, detects column types, and converts date strings to datetime.", + "icon_url": "/icons/templates/pandas.svg", + "prompt_template": """You are a AI data-preprocessing agent. Generate clean and efficient Python code using NumPy and Pandas to perform introductory data preprocessing on a pre-loaded DataFrame df, based on the user's analysis goals. +Preprocessing Requirements: +1. Identify Column Types +- Separate columns into numeric and categorical using: + categorical_columns = df.select_dtypes(include=[object, 'category']).columns.tolist() + numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() +2. Handle Missing Values +- Numeric columns: Impute missing values using the mean of each column +- Categorical columns: Impute missing values using the mode of each column +3. Convert Date Strings to Datetime +- For any column suspected to represent dates (in string format), convert it to datetime using: + def safe_to_datetime(date): + try: + return pd.to_datetime(date, errors='coerce', cache=False) + except (ValueError, TypeError): + return pd.NaT + df['datetime_column'] = df['datetime_column'].apply(safe_to_datetime) +- Replace 'datetime_column' with the actual column names containing date-like strings +Important Notes: +- Do NOT create a correlation matrix — correlation analysis is outside the scope of preprocessing +- Do NOT generate any plots or visualizations +Output Instructions: +1. Include the full preprocessing Python code +2. Provide a brief bullet-point summary of the steps performed. Example: +• Identified 5 numeric and 4 categorical columns +• Filled missing numeric values with column means +• Filled missing categorical values with column modes +• Converted 1 date column to datetime format + Respond in the user's language for all summary and reasoning but keep the code in english""" + } + ], + "Data Modelling": [ + { + "template_name": "statistical_analytics_agent", + "display_name": "Statistical Analytics Agent", + "description": "Performs statistical analysis (e.g., regression, seasonal decomposition) using statsmodels, with proper handling of categorical data and missing values.", + "icon_url": "/icons/templates/statsmodels.svg", + "prompt_template": """You are a statistical analytics agent. Your task is to take a dataset and a user-defined goal and output Python code that performs the appropriate statistical analysis to achieve that goal. Follow these guidelines: +IMPORTANT: You may be provided with previous interaction history. The section marked "### Current Query:" contains the user's current request. Any text in "### Previous Interaction History:" is for context only and is NOT part of the current request. +Data Handling: +Always handle strings as categorical variables in a regression using statsmodels C(string_column). +Do not change the index of the DataFrame. +Convert X and y into float when fitting a model. +Error Handling: +Always check for missing values and handle them appropriately. +Ensure that categorical variables are correctly processed. +Provide clear error messages if the model fitting fails. +Regression: +For regression, use statsmodels and ensure that a constant term is added to the predictor using sm.add_constant(X). +Handle categorical variables using C(column_name) in the model formula. +Fit the model with model = sm.OLS(y.astype(float), X.astype(float)).fit(). +Seasonal Decomposition: +Ensure the period is set correctly when performing seasonal decomposition. +Verify the number of observations works for the decomposition. +Output: +Ensure the code is executable and as intended. +Also choose the correct type of model for the problem +Avoid adding data visualization code. +Use code like this to prevent failing: +import pandas as pd +import numpy as np +import statsmodels.api as sm +def statistical_model(X, y, goal, period=None): + try: + # Check for missing values and handle them + X = X.dropna() + y = y.loc[X.index].dropna() + # Ensure X and y are aligned + X = X.loc[y.index] + # Convert categorical variables + for col in X.select_dtypes(include=['object', 'category']).columns: + X[col] = X[col].astype('category') + # Add a constant term to the predictor + X = sm.add_constant(X) + # Fit the model + if goal == 'regression': + # Handle categorical variables in the model formula + formula = 'y ~ ' + ' + '.join([f'C({col})' if X[col].dtype.name == 'category' else col for col in X.columns]) + model = sm.OLS(y.astype(float), X.astype(float)).fit() + return model.summary() + elif goal == 'seasonal_decompose': + if period is None: + raise ValueError("Period must be specified for seasonal decomposition") + decomposition = sm.tsa.seasonal_decompose(y, period=period) + return decomposition + else: + raise ValueError("Unknown goal specified. Please provide a valid goal.") + except Exception as e: + return f"An error occurred: {e}" +# Example usage: +result = statistical_analysis(X, y, goal='regression') +print(result) +If visualizing use plotly +Provide a concise bullet-point summary of the statistical analysis performed. + +Example Summary: +• Applied linear regression with OLS to predict house prices based on 5 features +• Model achieved R-squared of 0.78 +• Significant predictors include square footage (p<0.001) and number of bathrooms (p<0.01) +• Detected strong seasonal pattern with 12-month periodicity +• Forecast shows 15% growth trend over next quarter +Respond in the user's language for all summary and reasoning but keep the code in english""" + }, + { + "template_name": "sk_learn_agent", + "display_name": "Machine Learning Agent", + "description": "Trains and evaluates machine learning models using scikit-learn, including classification, regression, and clustering with feature importance insights.", + "icon_url": "/icons/templates/scikit-learn.svg", + "prompt_template": """You are a machine learning agent. +Your task is to take a dataset and a user-defined goal, and output Python code that performs the appropriate machine learning analysis to achieve that goal. +You should use the scikit-learn library. +IMPORTANT: You may be provided with previous interaction history. The section marked "### Current Query:" contains the user's current request. Any text in "### Previous Interaction History:" is for context only and is NOT part of the current request. +Make sure your output is as intended! +Provide a concise bullet-point summary of the machine learning operations performed. + +Example Summary: +• Trained a Random Forest classifier on customer churn data with 80/20 train-test split +• Model achieved 92% accuracy and 88% F1-score +• Feature importance analysis revealed that contract length and monthly charges are the strongest predictors of churn +• Implemented K-means clustering (k=4) on customer shopping behaviors +• Identified distinct segments: high-value frequent shoppers (22%), occasional big spenders (35%), budget-conscious regulars (28%), and rare visitors (15%) +Respond in the user's language for all summary and reasoning but keep the code in english""" + } + ], + "Data Visualization": [ + { + "template_name": "data_viz_agent", + "display_name": "Data Visualization Agent", + "description": "Generates interactive visualizations with Plotly, selecting the best chart type to reveal trends, comparisons, and insights based on the analysis goal.", + "icon_url": "/icons/templates/plotly.svg", + "prompt_template": """You are an AI agent responsible for generating interactive data visualizations using Plotly. +IMPORTANT Instructions: +- The section marked "### Current Query:" contains the user's request. Any text in "### Previous Interaction History:" is for context only and should NOT be treated as part of the current request. +- You must only use the tools provided to you. This agent handles visualization only. +- If len(df) > 50000, always sample the dataset before visualization using: +if len(df) > 50000: + df = df.sample(50000, random_state=1) +- Each visualization must be generated as a **separate figure** using go.Figure(). +Do NOT use subplots under any circumstances. +- Each figure must be returned individually using: +fig.to_html(full_html=False) +- Use update_layout with xaxis and yaxis **only once per figure**. +- Enhance readability and clarity by: +• Using low opacity (0.4-0.7) where appropriate +• Applying visually distinct colors for different elements or categories +- Make sure the visual **answers the user's specific goal**: +• Identify what insight or comparison the user is trying to achieve +• Choose the visualization type and features (e.g., color, size, grouping) to emphasize that goal +• For example, if the user asks for "trends in revenue," use a time series line chart; if they ask for "top-performing categories," use a bar chart sorted by value +• Prioritize highlighting patterns, outliers, or comparisons relevant to the question +- Never include the dataset or styling index in the output. +- If there are no relevant columns for the requested visualization, respond with: +"No relevant columns found to generate this visualization." +- Use only one number format consistently: either 'K', 'M', or comma-separated values like 1,000/1,000,000. Do not mix formats. +- Only include trendlines in scatter plots if the user explicitly asks for them. +- Output only the code and a concise bullet-point summary of what the visualization reveals. +- Always end each visualization with: +fig.to_html(full_html=False) +Respond in the user's language for all summary and reasoning but keep the code in english +Example Summary: +• Created an interactive scatter plot of sales vs. marketing spend with color-coded product categories +• Included a trend line showing positive correlation (r=0.72) +• Highlighted outliers where high marketing spend resulted in low sales +• Generated a time series chart of monthly revenue from 2020-2023 +• Added annotations for key business events +• Visualization reveals 35% YoY growth with seasonal peaks in Q4""" + } + ] +} + +# Premium template agent definitions +PREMIUM_TEMPLATES = { + "Data Visualization": [ { "template_name": "matplotlib_agent", "display_name": "Matplotlib Visualization Agent", @@ -27,7 +215,7 @@ You are a matplotlib/seaborn visualization expert. Your task is to create high-quality static visualizations using matplotlib and seaborn libraries. IMPORTANT Instructions: -- You must only use matplotlib, seaborn, and numpy/polars for visualizations +- You must only use matplotlib, seaborn, and numpy/pandas for visualizations - Always use plt.style.use('seaborn-v0_8') or a clean style for better aesthetics - Include proper titles, axis labels, and legends - Use appropriate color palettes and consider accessibility @@ -133,56 +321,106 @@ ] } -def populate_templates(): - """Populate the database with agent templates.""" +def populate_agents_and_templates(include_defaults=True, include_premiums=True): + """Populate the database with default agents and premium templates.""" session = session_factory() + db_type = get_database_type() try: # Track statistics - created_count = 0 + default_created = 0 + premium_created = 0 skipped_count = 0 - for category, templates in AGENT_TEMPLATES.items(): - print(f"\n--- Processing {category} Templates ---") - - for template_data in templates: - template_name = template_data["template_name"] - - # Check if template already exists - existing = session.query(AgentTemplate).filter( - AgentTemplate.template_name == template_name - ).first() - - if existing: - print(f"⏭️ Skipping {template_name} (already exists)") - skipped_count += 1 - continue + print(f"🔍 Detected {db_type.upper()} database") + print(f"📋 Database URL: {DATABASE_URL}") + + # Populate default agents (free) + if include_defaults: + print(f"\n🆓 --- Processing Default Agents (Free) ---") + for category, agents in DEFAULT_AGENTS.items(): + print(f"\n📁 {category}:") - # Create new template - template = AgentTemplate( - template_name=template_name, - display_name=template_data["display_name"], - description=template_data["description"], - icon_url=template_data["icon_url"], - prompt_template=template_data["prompt_template"], - category=category, - is_premium_only=True, # All templates require premium - is_active=True, - created_at=datetime.now(UTC), - updated_at=datetime.now(UTC) - ) + for agent_data in agents: + template_name = agent_data["template_name"] + + # Check if agent already exists + existing = session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() + + if existing: + print(f"⏭️ Skipping {template_name} (already exists)") + skipped_count += 1 + continue + + # Create new default agent + template = AgentTemplate( + template_name=template_name, + display_name=agent_data["display_name"], + description=agent_data["description"], + icon_url=agent_data["icon_url"], + prompt_template=agent_data["prompt_template"], + category=category, + is_premium_only=False, # Default agents are free + is_active=True, + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC) + ) + + session.add(template) + print(f"✅ Created default agent: {template_name}") + default_created += 1 + + # Populate premium templates (paid) + if include_premiums: + print(f"\n🔒 --- Processing Premium Templates (Paid) ---") + for category, templates in PREMIUM_TEMPLATES.items(): + print(f"\n📁 {category}:") - session.add(template) - print(f"✅ Created template: {template_name}") - created_count += 1 + for template_data in templates: + template_name = template_data["template_name"] + + # Check if template already exists + existing = session.query(AgentTemplate).filter( + AgentTemplate.template_name == template_name + ).first() + + if existing: + print(f"⏭️ Skipping {template_name} (already exists)") + skipped_count += 1 + continue + + # Create new premium template + template = AgentTemplate( + template_name=template_name, + display_name=template_data["display_name"], + description=template_data["description"], + icon_url=template_data["icon_url"], + prompt_template=template_data["prompt_template"], + category=category, + is_premium_only=True, # Premium templates require subscription + is_active=True, + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC) + ) + + session.add(template) + print(f"✅ Created premium template: {template_name}") + premium_created += 1 # Commit all changes session.commit() - print(f"\n--- Summary ---") - print(f"Created: {created_count} templates") - print(f"Skipped: {skipped_count} templates") - print(f"Total templates in database: {created_count + skipped_count}") + print(f"\n📊 --- Summary ---") + print(f"🆓 Default agents created: {default_created}") + print(f"🔒 Premium templates created: {premium_created}") + print(f"⏭️ Skipped (already exist): {skipped_count}") + print(f"📈 Total new templates: {default_created + premium_created}") + + # Show total count in database + total_count = session.query(AgentTemplate).count() + print(f"🗄️ Total templates in database: {total_count}") except Exception as e: session.rollback() @@ -191,6 +429,10 @@ def populate_templates(): finally: session.close() +def populate_templates(): + """Legacy function for backward compatibility - only premium templates.""" + populate_agents_and_templates(include_defaults=True, include_premiums=True) + def list_templates(): """List all existing templates.""" session = session_factory() @@ -236,18 +478,42 @@ def remove_all_templates(): finally: session.close() +def auto_populate_for_database(): + """Automatically populate based on database type.""" + db_type = get_database_type() + + if db_type == "sqlite": + print("🔍 SQLite detected - populating both default agents and premium templates") + populate_agents_and_templates(include_defaults=True, include_premiums=True) + elif db_type == "postgresql": + print("🔍 PostgreSQL detected - populating only premium templates") + populate_agents_and_templates(include_defaults=False, include_premiums=True) + else: + print(f"⚠️ Unknown database type: {db_type}") + print("Populating both default agents and premium templates") + populate_agents_and_templates(include_defaults=True, include_premiums=True) + if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Manage agent templates") - parser.add_argument("action", choices=["populate", "list", "remove-all"], + parser.add_argument("action", choices=["populate", "populate-all", "populate-defaults", "auto", "list", "remove-all"], help="Action to perform") args = parser.parse_args() if args.action == "populate": - print("🚀 Populating agent templates...") + print("🚀 Populating premium templates only...") populate_templates() + elif args.action == "populate-all": + print("🚀 Populating both default agents and premium templates...") + populate_agents_and_templates(include_defaults=True, include_premiums=True) + elif args.action == "populate-defaults": + print("🚀 Populating default agents only...") + populate_agents_and_templates(include_defaults=True, include_premiums=False) + elif args.action == "auto": + print("🚀 Auto-populating based on database type...") + auto_populate_for_database() elif args.action == "list": list_templates() elif args.action == "remove-all": diff --git a/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx b/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx index 86610d56..89fab668 100644 --- a/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx +++ b/auto-analyst-frontend/components/custom-templates/TemplatesModal.tsx @@ -64,21 +64,39 @@ export default function TemplatesModal({ const loadTemplatesForFreeUsers = async () => { setLoading(true) try { + console.log('Loading templates for free users...', { API_URL }) + // Fetch all templates (no user-specific data needed) - const response = await fetch(`${API_URL}/templates/`) + const response = await fetch(`${API_URL}/templates/`).catch(err => { + console.error('Free user templates fetch error:', err) + throw new Error(`Templates endpoint failed: ${err.message}`) + }) - if (response.ok) { - const templatesData = await response.json() - setTemplates(templatesData) - setPreferences([]) // No preferences for free users + console.log('Free user templates response:', { status: response.status }) + + if (!response.ok) { + const errorText = await response.text() + console.error('Free user templates response error:', { status: response.status, errorText }) + throw new Error(`Failed to load templates: ${response.status} ${response.statusText} - ${errorText}`) } + const templatesData = await response.json().catch(err => { + console.error('Free user templates JSON parse error:', err) + throw new Error(`Failed to parse templates response: ${err.message}`) + }) + + console.log('Free user templates data parsed successfully:', { + templatesCount: templatesData.length + }) + + setTemplates(templatesData) + setPreferences([]) // No preferences for free users setChanges({}) } catch (error) { console.error('Error loading templates:', error) toast({ title: "Error", - description: "Failed to load agents", + description: error instanceof Error ? error.message : "Failed to load agents", variant: "destructive", }) } finally { @@ -90,59 +108,93 @@ export default function TemplatesModal({ const loadData = async () => { setLoading(true) try { + console.log('Loading templates data for modal...', { API_URL, userId }) + // Fetch global template data with global usage counts const [templatesResponse, preferencesResponse] = await Promise.all([ - fetch(`${API_URL}/templates/`), // Global templates with global usage counts - fetch(`${API_URL}/templates/user/${userId}`) // User preferences with per-user usage + fetch(`${API_URL}/templates/`).catch(err => { + console.error('Templates fetch error:', err) + throw new Error(`Templates endpoint failed: ${err.message}`) + }), // Global templates with global usage counts + fetch(`${API_URL}/templates/user/${userId}`).catch(err => { + console.error('Preferences fetch error:', err) + throw new Error(`Preferences endpoint failed: ${err.message}`) + }) // User preferences with per-user usage ]) - if (templatesResponse.ok) { - // Global templates with global usage counts - const globalTemplatesData = await templatesResponse.json() - - // Convert to TemplateAgent format with global usage counts - const templatesData = globalTemplatesData.map((item: any) => ({ - template_id: item.template_id, - template_name: item.template_name, - display_name: item.display_name, - description: item.description, - prompt_template: item.prompt_template, - template_category: item.template_category, - icon_url: item.icon_url, - is_premium_only: item.is_premium_only, - is_active: item.is_active, - usage_count: item.usage_count, // Global usage count from /templates/ endpoint - created_at: item.created_at - })) - setTemplates(templatesData) + console.log('Modal responses received:', { + templatesStatus: templatesResponse.status, + preferencesStatus: preferencesResponse.status + }) + + // Check templates response + if (!templatesResponse.ok) { + const errorText = await templatesResponse.text() + console.error('Templates response error:', { status: templatesResponse.status, errorText }) + throw new Error(`Failed to load templates: ${templatesResponse.status} ${templatesResponse.statusText} - ${errorText}`) } - if (preferencesResponse.ok) { - // User preferences (enabled/disabled status and per-user usage) - const userPreferencesData = await preferencesResponse.json() - - const preferencesData = userPreferencesData.map((item: any) => ({ - template_id: item.template_id, - template_name: item.template_name, - display_name: item.display_name, - description: item.description, - template_category: item.template_category, - icon_url: item.icon_url, - is_premium_only: item.is_premium_only, - is_enabled: item.is_enabled, - usage_count: item.usage_count, // Keep user-specific usage for preferences if needed - last_used_at: item.last_used_at - })) - setPreferences(preferencesData) + // Check preferences response + if (!preferencesResponse.ok) { + const errorText = await preferencesResponse.text() + console.error('Preferences response error:', { status: preferencesResponse.status, errorText }) + throw new Error(`Failed to load preferences: ${preferencesResponse.status} ${preferencesResponse.statusText} - ${errorText}`) } + // Parse templates response + const globalTemplatesData = await templatesResponse.json().catch(err => { + console.error('Templates JSON parse error:', err) + throw new Error(`Failed to parse templates response: ${err.message}`) + }) + + // Convert to TemplateAgent format with global usage counts + const templatesData = globalTemplatesData.map((item: any) => ({ + template_id: item.template_id, + template_name: item.template_name, + display_name: item.display_name, + description: item.description, + prompt_template: item.prompt_template, + template_category: item.template_category, + icon_url: item.icon_url, + is_premium_only: item.is_premium_only, + is_active: item.is_active, + usage_count: item.usage_count, // Global usage count from /templates/ endpoint + created_at: item.created_at + })) + setTemplates(templatesData) + + // Parse preferences response + const userPreferencesData = await preferencesResponse.json().catch(err => { + console.error('Preferences JSON parse error:', err) + throw new Error(`Failed to parse preferences response: ${err.message}`) + }) + + const preferencesData = userPreferencesData.map((item: any) => ({ + template_id: item.template_id, + template_name: item.template_name, + display_name: item.display_name, + description: item.description, + template_category: item.template_category, + icon_url: item.icon_url, + is_premium_only: item.is_premium_only, + is_enabled: item.is_enabled, + usage_count: item.usage_count, // Keep user-specific usage for preferences if needed + last_used_at: item.last_used_at + })) + setPreferences(preferencesData) + + console.log('Modal data parsed successfully:', { + templatesCount: templatesData.length, + preferencesCount: preferencesData.length + }) + // Reset changes when loading data setChanges({}) } catch (error) { console.error('Error loading data:', error) toast({ title: "Error", - description: "Failed to load agents", + description: error instanceof Error ? error.message : "Failed to load agents", variant: "destructive", }) } finally { diff --git a/auto-analyst-frontend/components/custom-templates/useTemplates.ts b/auto-analyst-frontend/components/custom-templates/useTemplates.ts index c0ee739a..f958d116 100644 --- a/auto-analyst-frontend/components/custom-templates/useTemplates.ts +++ b/auto-analyst-frontend/components/custom-templates/useTemplates.ts @@ -34,24 +34,57 @@ export function useTemplates({ userId, enabled = true }: UseTemplatesProps): Use setError(null) try { + console.log('Loading templates data...', { API_URL, userId }) + const [templatesResponse, preferencesResponse] = await Promise.all([ - fetch(`${API_URL}/templates`), - fetch(`${API_URL}/templates/user/${userId}`) + fetch(`${API_URL}/templates/`).catch(err => { + console.error('Templates fetch error:', err) + throw new Error(`Templates endpoint failed: ${err.message}`) + }), + fetch(`${API_URL}/templates/user/${userId}`).catch(err => { + console.error('Preferences fetch error:', err) + throw new Error(`Preferences endpoint failed: ${err.message}`) + }) ]) - if (templatesResponse.ok) { - const templatesData = await templatesResponse.json() - setTemplates(templatesData) - } else { - throw new Error('Failed to load templates') + console.log('Responses received:', { + templatesStatus: templatesResponse.status, + preferencesStatus: preferencesResponse.status + }) + + // Check templates response + if (!templatesResponse.ok) { + const errorText = await templatesResponse.text() + console.error('Templates response error:', { status: templatesResponse.status, errorText }) + throw new Error(`Failed to load templates: ${templatesResponse.status} ${templatesResponse.statusText} - ${errorText}`) } - if (preferencesResponse.ok) { - const preferencesData = await preferencesResponse.json() - setPreferences(preferencesData) - } else { - throw new Error('Failed to load preferences') + // Check preferences response + if (!preferencesResponse.ok) { + const errorText = await preferencesResponse.text() + console.error('Preferences response error:', { status: preferencesResponse.status, errorText }) + throw new Error(`Failed to load preferences: ${preferencesResponse.status} ${preferencesResponse.statusText} - ${errorText}`) } + + // Parse responses + const templatesData = await templatesResponse.json().catch(err => { + console.error('Templates JSON parse error:', err) + throw new Error(`Failed to parse templates response: ${err.message}`) + }) + + const preferencesData = await preferencesResponse.json().catch(err => { + console.error('Preferences JSON parse error:', err) + throw new Error(`Failed to parse preferences response: ${err.message}`) + }) + + console.log('Data parsed successfully:', { + templatesCount: templatesData.length, + preferencesCount: preferencesData.length + }) + + setTemplates(templatesData) + setPreferences(preferencesData) + } catch (err) { const errorMessage = err instanceof Error ? err.message : 'Failed to load data' setError(errorMessage) From bf547ba0b1f4b1dc998a2d5c5ddf8e19f90b1476 Mon Sep 17 00:00:00 2001 From: Ashad Qureshi Date: Tue, 17 Jun 2025 21:35:33 +0500 Subject: [PATCH 7/7] remove extra files --- auto-analyst-backend/heatmap_correlation.png | 3 - auto-analyst-backend/load_default_agents.py | 42 --- .../manage_sqlite_templates.py | 291 ------------------ 3 files changed, 336 deletions(-) delete mode 100644 auto-analyst-backend/heatmap_correlation.png delete mode 100644 auto-analyst-backend/load_default_agents.py delete mode 100644 auto-analyst-backend/manage_sqlite_templates.py diff --git a/auto-analyst-backend/heatmap_correlation.png b/auto-analyst-backend/heatmap_correlation.png deleted file mode 100644 index 851174bf..00000000 --- a/auto-analyst-backend/heatmap_correlation.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f478326d2aaeebf15aff548357f32202f7ac95c24c76e91d52fa999cb63783d0 -size 408092 diff --git a/auto-analyst-backend/load_default_agents.py b/auto-analyst-backend/load_default_agents.py deleted file mode 100644 index be0a9bc2..00000000 --- a/auto-analyst-backend/load_default_agents.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to load default agents into the AgentTemplate table. -Run this script to populate the database with the default agents. -""" - -import sys -import os -from pathlib import Path - -# Add the src directory to the path -src_path = Path(__file__).parent / "src" -sys.path.append(str(src_path)) - -def main(): - try: - from src.db.init_default_agents import initialize_default_agents - - # Initialize default agents with force update enabled - success = initialize_default_agents(force_update=True) - - if success: - print("✅ Successfully loaded default agents into the database!") - print("The following agents are now available:") - print(" • Data Preprocessing Agent (preprocessing_agent)") - print(" • Statistical Analytics Agent (statistical_analytics_agent)") - print(" • Machine Learning Agent (sk_learn_agent)") - print(" • Data Visualization Agent (data_viz_agent)") - else: - print("❌ Failed to load default agents") - sys.exit(1) - - except ImportError as e: - print(f"❌ Import error: {e}") - print("Make sure you're running this script from the auto-analyst-backend directory") - sys.exit(1) - except Exception as e: - print(f"❌ Error: {e}") - sys.exit(1) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/auto-analyst-backend/manage_sqlite_templates.py b/auto-analyst-backend/manage_sqlite_templates.py deleted file mode 100644 index 742cb16e..00000000 --- a/auto-analyst-backend/manage_sqlite_templates.py +++ /dev/null @@ -1,291 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to populate agent templates. -These templates are available to all users but usable only by paid users. -""" - -import sys -import os -from datetime import datetime, UTC - -# Handle path for SQLite database access -script_dir = os.path.dirname(os.path.abspath(__file__)) -backend_dir = os.path.dirname(script_dir) -original_cwd = os.getcwd() - -# Add the backend directory to the Python path -sys.path.append(backend_dir) - -# Change to backend directory for SQLite database access -os.chdir(backend_dir) - -try: - from src.db.init_db import session_factory, DATABASE_URL - from src.db.schemas.models import AgentTemplate - from sqlalchemy.exc import IntegrityError - - # Check if we're using SQLite - is_sqlite = DATABASE_URL.startswith('sqlite') - print(f"🔍 Database type: {'SQLite' if is_sqlite else 'PostgreSQL'}") - print(f"📁 Working directory: {os.getcwd()}") - -except ImportError as e: - print(f"❌ Error importing database modules: {str(e)}") - os.chdir(original_cwd) - sys.exit(1) - -# Template agent definitions -AGENT_TEMPLATES = { - "Visualization": [ - { - "template_name": "matplotlib_agent", - "display_name": "Matplotlib Visualization Agent", - "description": "Creates static publication-quality plots using matplotlib and seaborn", - "icon_url": "/icons/templates/matplotlib.svg", - "prompt_template": """ -You are a matplotlib/seaborn visualization expert. Your task is to create high-quality static visualizations using matplotlib and seaborn libraries. - -IMPORTANT Instructions: -- You must only use matplotlib, seaborn, and numpy/polars for visualizations -- Always use plt.style.use('seaborn-v0_8') or a clean style for better aesthetics -- Include proper titles, axis labels, and legends -- Use appropriate color palettes and consider accessibility -- Sample data if len(df) > 50000 using: df = df.sample(50000, random_state=42) -- Save figures with plt.tight_layout() and high DPI: plt.savefig('plot.png', dpi=300, bbox_inches='tight') -- Always end with plt.show() - -Focus on creating publication-ready static visualizations that are informative and aesthetically pleasing. -""" - }, - { - "template_name": "seaborn_agent", - "display_name": "Seaborn Statistical Plots Agent", - "description": "Creates statistical visualizations and data exploration plots using seaborn", - "icon_url": "/icons/templates/seaborn.svg", - "prompt_template": """ -You are a seaborn statistical visualization expert. Your task is to create statistical plots and exploratory data visualizations. - -IMPORTANT Instructions: -- Focus on seaborn for statistical plotting (distributions, relationships, categorical data) -- Use matplotlib as the backend for customization -- Create informative statistical plots: histograms, box plots, violin plots, pair plots, heatmaps -- Apply proper statistical annotations and significance testing where relevant -- Use seaborn's built-in themes and color palettes for professional appearance -- Include statistical summaries and insights in plot annotations -- Handle categorical and numerical data appropriately -- Always include proper legends, titles, and axis labels - -Focus on revealing statistical patterns and relationships in data through visualization. -""" - }, - ], - "Data Manipulation": [ - { - "template_name": "polars_agent", - "display_name": "Polars Data Processing Agent", - "description": "High-performance data manipulation and analysis using Polars", - "icon_url": "/icons/templates/polars.svg", - "prompt_template": """ -You are a Polars data processing expert. Perform high-performance data manipulation and analysis using Polars. - -IMPORTANT Instructions: -- Use Polars for fast, memory-efficient data processing -- Leverage lazy evaluation with pl.scan_csv() and .lazy() for large datasets -- Implement efficient data transformations using Polars expressions -- Use Polars-specific methods for groupby, aggregations, and window functions -- Handle various data types and perform type conversions appropriately -- Optimize queries for performance using lazy evaluation and query optimization -- Implement complex data reshaping (pivots, melts, joins) -- Use Polars datetime functionality for time-based operations -- Convert to pandas only when necessary for visualization or other libraries -- Focus on performance and memory efficiency - -Focus on leveraging Polars' speed and efficiency for data processing tasks. -""" - }, - { - "template_name": "data_cleaning_agent", - "display_name": "Data Cleaning Specialist Agent", - "description": "Specialized in comprehensive data cleaning and quality assessment", - "icon_url": "/icons/templates/data-cleaning.png", - "prompt_template": """ -You are a data cleaning specialist. Perform comprehensive data quality assessment and cleaning. - -IMPORTANT Instructions: -- Detect and handle missing values, duplicates, and outliers -- Identify data type inconsistencies and fix them -- Perform data validation and quality checks -- Handle inconsistent formatting (dates, strings, numbers) -- Detect and fix encoding issues -- Create data quality reports with statistics and visualizations -- Implement robust cleaning pipelines -- Flag potential data quality issues for manual review -- Use appropriate imputation strategies based on data characteristics -- Document all cleaning steps and transformations applied - -Focus on delivering high-quality, analysis-ready datasets with comprehensive documentation. -""" - }, - { - "template_name": "feature_engineering_agent", - "display_name": "Feature Engineering Agent", - "description": "Creates and transforms features for machine learning models", - "icon_url": "/icons/templates/feature-engineering.png", - "prompt_template": """ -You are a feature engineering expert. Create, transform, and select features for machine learning. - -IMPORTANT Instructions: -- Create meaningful features from existing data (polynomial, interaction, binning) -- Encode categorical variables appropriately (one-hot, label, target encoding) -- Scale and normalize numerical features -- Handle datetime features (extract components, create time-based features) -- Perform feature selection using statistical tests and model-based methods -- Create domain-specific features based on data context -- Handle high-cardinality categorical features -- Use cross-validation for feature selection to avoid overfitting -- Visualize feature distributions and relationships -- Document feature creation rationale and transformations - -Focus on creating predictive features that improve model performance while avoiding data leakage. -""" - } - ] -} - -def populate_templates(): - """Populate the database with agent templates.""" - session = session_factory() - - try: - # Track statistics - created_count = 0 - skipped_count = 0 - - for category, templates in AGENT_TEMPLATES.items(): - print(f"\n--- Processing {category} Templates ---") - - for template_data in templates: - template_name = template_data["template_name"] - - # Check if template already exists - existing = session.query(AgentTemplate).filter( - AgentTemplate.template_name == template_name - ).first() - - if existing: - print(f"⏭️ Skipping {template_name} (already exists)") - skipped_count += 1 - continue - - # Create new template - template = AgentTemplate( - template_name=template_name, - display_name=template_data["display_name"], - description=template_data["description"], - icon_url=template_data["icon_url"], - prompt_template=template_data["prompt_template"], - category=category, - is_premium_only=True, # All templates require premium - is_active=True, - created_at=datetime.now(UTC), - updated_at=datetime.now(UTC) - ) - - session.add(template) - print(f"✅ Created template: {template_name}") - created_count += 1 - - # Commit all changes - session.commit() - - print(f"\n--- Summary ---") - print(f"Created: {created_count} templates") - print(f"Skipped: {skipped_count} templates") - print(f"Total templates in database: {created_count + skipped_count}") - - except Exception as e: - session.rollback() - print(f"❌ Error populating templates: {str(e)}") - raise - finally: - session.close() - # Restore original working directory - os.chdir(original_cwd) - -def list_templates(): - """List all existing templates.""" - session = session_factory() - - try: - templates = session.query(AgentTemplate).order_by(AgentTemplate.category, AgentTemplate.template_name).all() - - if not templates: - print("No templates found in database.") - return - - print(f"\n--- Existing Templates ({len(templates)} total) ---") - - current_category = None - for template in templates: - if template.category != current_category: - current_category = template.category - print(f"\n{current_category}:") - - status = "🔒 Premium" if template.is_premium_only else "🆓 Free" - active = "✅ Active" if template.is_active else "❌ Inactive" - print(f" • {template.template_name} ({template.display_name}) - {status} - {active}") - print(f" {template.description}") - - except Exception as e: - print(f"❌ Error listing templates: {str(e)}") - finally: - session.close() - # Restore original working directory - os.chdir(original_cwd) - -def remove_all_templates(): - """Remove all templates (for testing).""" - session = session_factory() - - try: - deleted_count = session.query(AgentTemplate).delete() - - session.commit() - print(f"🗑️ Removed {deleted_count} templates") - - except Exception as e: - session.rollback() - print(f"❌ Error removing templates: {str(e)}") - finally: - session.close() - # Restore original working directory - os.chdir(original_cwd) - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Manage agent templates") - parser.add_argument("action", choices=["populate", "list", "remove-all"], - help="Action to perform") - - args = parser.parse_args() - - try: - if args.action == "populate": - print("🚀 Populating agent templates...") - populate_templates() - elif args.action == "list": - list_templates() - elif args.action == "remove-all": - confirm = input("⚠️ Are you sure you want to remove ALL templates? (yes/no): ") - if confirm.lower() == "yes": - remove_all_templates() - else: - print("Operation cancelled.") - # Restore directory even if cancelled - os.chdir(original_cwd) - except Exception as e: - print(f"❌ Script failed: {e}") - # Ensure directory is restored on any error - os.chdir(original_cwd) - sys.exit(1) \ No newline at end of file