Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 103 additions & 53 deletions auto-analyst-backend/app.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions auto-analyst-backend/chat_database.db
Git LFS file not shown
3 changes: 3 additions & 0 deletions auto-analyst-backend/cleaned_property_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus_semi-furnished,normalized_price,normalized_area
13300000,7420,4,2,3,1,1,0,0,1,2,0,True,0.7071067811865476,-0.7071067811865476
12250000,8960,4,4,4,1,0,1,1,1,3,1,False,-0.7071067811865476,0.7071067811865476
3 changes: 3 additions & 0 deletions auto-analyst-backend/property_price_vs_area.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions auto-analyst-backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ openpyxl==3.1.2
xlrd==2.0.1
openai==1.61.0
pandas==2.2.3
polars==1.30.0
pillow==11.1.0
plotly==5.24.1
psycopg2==2.9.10
Expand Down
191 changes: 53 additions & 138 deletions auto-analyst-backend/scripts/populate_agent_templates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
Script to populate custom agent templates.
Script to populate agent templates.
These templates are available to all users but usable only by paid users.
"""

Expand All @@ -12,21 +12,22 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from src.db.init_db import session_factory
from src.db.schemas.models import CustomAgent
from src.db.schemas.models import AgentTemplate
from sqlalchemy.exc import IntegrityError

# Template agent definitions
AGENT_TEMPLATES = {
"Visualization": [
{
"agent_name": "matplotlib_agent",
"template_name": "matplotlib_agent",
"display_name": "Matplotlib Visualization Agent",
"description": "Creates static publication-quality plots using matplotlib and seaborn",
"icon_url": "https://cdn.jsdelivr.net/gh/devicons/devicon/icons/matplotlib/matplotlib-original.svg",
"prompt_template": """
You are a matplotlib/seaborn visualization expert. Your task is to create high-quality static visualizations using matplotlib and seaborn libraries.

IMPORTANT Instructions:
- You must only use matplotlib, seaborn, and numpy/pandas for visualizations
- You must only use matplotlib, seaborn, and numpy/polars for visualizations
- Always use plt.style.use('seaborn-v0_8') or a clean style for better aesthetics
- Include proper titles, axis labels, and legends
- Use appropriate color palettes and consider accessibility
Expand All @@ -38,135 +39,56 @@
"""
},
{
"agent_name": "seaborn_agent",
"template_name": "seaborn_agent",
"display_name": "Seaborn Statistical Plots Agent",
"description": "Creates statistical visualizations and plots using seaborn library",
"description": "Creates statistical visualizations and data exploration plots using seaborn",
"icon_url": "https://seaborn.pydata.org/_images/logo-mark-lightbg.svg",
"prompt_template": """
You are a seaborn statistical visualization expert. Create insightful statistical plots using seaborn.
You are a seaborn statistical visualization expert. Your task is to create statistical plots and exploratory data visualizations.

IMPORTANT Instructions:
- Specialize in seaborn's statistical plotting capabilities
- Use seaborn's built-in statistical functions (regplot, distplot, boxplot, violin, etc.)
- Apply appropriate statistical themes and color palettes
- Include confidence intervals and statistical annotations where relevant
- Sample large datasets: if len(df) > 50000: df = df.sample(50000, random_state=42)
- Use plt.figure(figsize=(10, 6)) for appropriate sizing
- Always include proper statistical context in titles and labels
- Focus on seaborn for statistical plotting (distributions, relationships, categorical data)
- Use matplotlib as the backend for customization
- Create informative statistical plots: histograms, box plots, violin plots, pair plots, heatmaps
- Apply proper statistical annotations and significance testing where relevant
- Use seaborn's built-in themes and color palettes for professional appearance
- Include statistical summaries and insights in plot annotations
- Handle categorical and numerical data appropriately
- Always include proper legends, titles, and axis labels

Focus on revealing statistical relationships and distributions in the data.
Focus on revealing statistical patterns and relationships in data through visualization.
"""
},
{
"agent_name": "plotly_advanced_agent",
"display_name": "Advanced Plotly Agent",
"description": "Creates sophisticated interactive visualizations with advanced Plotly features",
"prompt_template": """
You are an advanced Plotly visualization expert. Create sophisticated interactive visualizations with advanced features.

IMPORTANT Instructions:
- Use advanced Plotly features: subplots, animations, 3D plots, statistical charts
- Implement interactive features: hover data, clickable legends, zoom, pan
- Use plotly.graph_objects for fine control and plotly.express for rapid prototyping
- Add annotations, shapes, and custom styling
- Sample data if len(df) > 50000: df = df.sample(50000, random_state=42)
- Use fig.update_layout() for professional styling
- Return fig.to_html(full_html=False) for embedding

Focus on creating publication-quality interactive visualizations with advanced features.
"""
}
],
"Modelling": [
{
"agent_name": "xgboost_agent",
"display_name": "XGBoost Machine Learning Agent",
"description": "Builds and optimizes XGBoost models for classification and regression tasks",
"prompt_template": """
You are an XGBoost machine learning expert. Build, tune, and evaluate XGBoost models.

IMPORTANT Instructions:
- Use XGBoost for both classification and regression tasks
- Implement proper train/validation/test splits
- Perform hyperparameter tuning using GridSearchCV or RandomizedSearchCV
- Handle categorical variables with proper encoding
- Include feature importance analysis and visualization
- Evaluate models with appropriate metrics (accuracy, precision, recall, F1, RMSE, MAE, etc.)
- Use cross-validation for robust model evaluation
- Plot training curves and validation curves
- Provide model interpretation and feature importance insights

Focus on building production-ready XGBoost models with proper evaluation and interpretation.
"""
},
{
"agent_name": "neural_network_agent",
"display_name": "Neural Network Agent",
"description": "Builds and trains neural networks using TensorFlow/Keras",
"prompt_template": """
You are a neural network expert using TensorFlow/Keras. Build and train neural networks for various tasks.

IMPORTANT Instructions:
- Design appropriate network architectures for the task (classification, regression, etc.)
- Implement proper data preprocessing and normalization
- Use appropriate activation functions, optimizers, and loss functions
- Implement callbacks: EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
- Plot training history (loss and metrics over epochs)
- Evaluate model performance with appropriate metrics
- Include model summary and architecture visualization
- Handle overfitting with dropout, regularization, or data augmentation
- Use train/validation/test splits properly

Focus on building effective neural networks with proper training procedures and evaluation.
"""
},
{
"agent_name": "time_series_agent",
"display_name": "Time Series Forecasting Agent",
"description": "Specialized in time series analysis and forecasting using ARIMA, Prophet, LSTM",
"prompt_template": """
You are a time series forecasting expert. Analyze temporal data and create forecasting models.

IMPORTANT Instructions:
- Perform exploratory time series analysis (trend, seasonality, stationarity)
- Use appropriate models: ARIMA, SARIMA, Prophet, LSTM, or ensemble methods
- Test for stationarity using ADF test and apply differencing if needed
- Decompose time series into trend, seasonal, and residual components
- Create forecasts with confidence intervals
- Evaluate forecasts using MAE, RMSE, MAPE metrics
- Plot actual vs predicted values and residuals
- Handle missing values and outliers appropriately
- Consider multiple seasonalities and external factors

Focus on accurate time series forecasting with proper validation and uncertainty quantification.
"""
}
],
"Data Manipulation": [
{
"agent_name": "pandas_expert_agent",
"display_name": "Pandas Data Expert Agent",
"description": "Advanced pandas operations for complex data manipulation and analysis",
"template_name": "polars_agent",
"display_name": "Polars Data Processing Agent",
"description": "High-performance data manipulation and analysis using Polars",
"icon_url": "https://raw.githubusercontent.com/pola-rs/polars-static/master/logos/polars-logo-dark.svg",
"prompt_template": """
You are a pandas expert specializing in advanced data manipulation and analysis.
You are a Polars data processing expert. Perform high-performance data manipulation and analysis using Polars.

IMPORTANT Instructions:
- Use advanced pandas operations: groupby, pivot, merge, concat, apply, transform
- Implement efficient data cleaning and preprocessing workflows
- Handle missing data with multiple strategies (imputation, dropping, flagging)
- Perform advanced aggregations and window functions
- Use vectorized operations for performance
- Handle large datasets efficiently with chunking if needed
- Create custom functions for complex transformations
- Use proper indexing and data types for optimization
- Include data quality checks and validation
- Use Polars for fast, memory-efficient data processing
- Leverage lazy evaluation with pl.scan_csv() and .lazy() for large datasets
- Implement efficient data transformations using Polars expressions
- Use Polars-specific methods for groupby, aggregations, and window functions
- Handle various data types and perform type conversions appropriately
- Optimize queries for performance using lazy evaluation and query optimization
- Implement complex data reshaping (pivots, melts, joins)
- Use Polars datetime functionality for time-based operations
- Convert to pandas only when necessary for visualization or other libraries
- Focus on performance and memory efficiency

Focus on efficient and robust data manipulation that prepares data for analysis or modeling.
Focus on leveraging Polars' speed and efficiency for data processing tasks.
"""
},
{
"agent_name": "data_cleaning_agent",
"template_name": "data_cleaning_agent",
"display_name": "Data Cleaning Specialist Agent",
"description": "Specialized in comprehensive data cleaning and quality assessment",
"icon_url": "https://cdn-icons-png.flaticon.com/512/2103/2103633.png",
"prompt_template": """
You are a data cleaning specialist. Perform comprehensive data quality assessment and cleaning.

Expand All @@ -186,9 +108,10 @@
"""
},
{
"agent_name": "feature_engineering_agent",
"template_name": "feature_engineering_agent",
"display_name": "Feature Engineering Agent",
"description": "Creates and transforms features for machine learning models",
"icon_url": "https://cdn-icons-png.flaticon.com/512/2103/2103658.png",
"prompt_template": """
You are a feature engineering expert. Create, transform, and select features for machine learning.

Expand Down Expand Up @@ -223,37 +146,34 @@ def populate_templates():
print(f"\n--- Processing {category} Templates ---")

for template_data in templates:
agent_name = template_data["agent_name"]
template_name = template_data["template_name"]

# Check if template already exists
existing = session.query(CustomAgent).filter(
CustomAgent.agent_name == agent_name,
CustomAgent.is_template == True
existing = session.query(AgentTemplate).filter(
AgentTemplate.template_name == template_name
).first()

if existing:
print(f"⏭️ Skipping {agent_name} (already exists)")
print(f"⏭️ Skipping {template_name} (already exists)")
skipped_count += 1
continue

# Create new template
template = CustomAgent(
user_id=None, # Templates don't belong to specific users
agent_name=agent_name,
template = AgentTemplate(
template_name=template_name,
display_name=template_data["display_name"],
description=template_data["description"],
icon_url=template_data["icon_url"],
prompt_template=template_data["prompt_template"],
is_template=True,
template_category=category,
category=category,
is_premium_only=True, # All templates require premium
is_active=True,
usage_count=0,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC)
)

session.add(template)
print(f"✅ Created template: {agent_name}")
print(f"✅ Created template: {template_name}")
created_count += 1

# Commit all changes
Expand All @@ -276,9 +196,7 @@ def list_templates():
session = session_factory()

try:
templates = session.query(CustomAgent).filter(
CustomAgent.is_template == True
).order_by(CustomAgent.template_category, CustomAgent.agent_name).all()
templates = session.query(AgentTemplate).order_by(AgentTemplate.category, AgentTemplate.template_name).all()

if not templates:
print("No templates found in database.")
Expand All @@ -288,15 +206,14 @@ def list_templates():

current_category = None
for template in templates:
if template.template_category != current_category:
current_category = template.template_category
if template.category != current_category:
current_category = template.category
print(f"\n{current_category}:")

status = "🔒 Premium" if template.is_premium_only else "🆓 Free"
active = "✅ Active" if template.is_active else "❌ Inactive"
print(f" • {template.agent_name} ({template.display_name}) - {status} - {active}")
print(f" • {template.template_name} ({template.display_name}) - {status} - {active}")
print(f" {template.description}")
print(f" Usage: {template.usage_count} times")

except Exception as e:
print(f"❌ Error listing templates: {str(e)}")
Expand All @@ -308,9 +225,7 @@ def remove_all_templates():
session = session_factory()

try:
deleted_count = session.query(CustomAgent).filter(
CustomAgent.is_template == True
).delete()
deleted_count = session.query(AgentTemplate).delete()

session.commit()
print(f"🗑️ Removed {deleted_count} templates")
Expand All @@ -324,7 +239,7 @@ def remove_all_templates():
if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(description="Manage custom agent templates")
parser = argparse.ArgumentParser(description="Manage agent templates")
parser.add_argument("action", choices=["populate", "list", "remove-all"],
help="Action to perform")

Expand Down
Loading
Loading