diff --git a/config_error_handling_rules.yaml.txt b/config_error_handling_rules.yaml.txt
new file mode 100644
index 00000000..6cf80d65
--- /dev/null
+++ b/config_error_handling_rules.yaml.txt
@@ -0,0 +1,114 @@
+# Error Handling Rules Configuration
+
+# Error Category Definitions
+error_categories:
+ critical:
+ errors:
+ - "DatabaseConnectionError"
+ - "OutOfMemoryError"
+ - "SystemError"
+ - "FatalError"
+ - "SecurityError"
+ action: "alert_and_retry"
+
+ recoverable:
+ errors:
+ - "TimeoutError"
+ - "ConnectionError"
+ - "RateLimitError"
+ - "TemporaryError"
+ - "NetworkError"
+ - "ServiceUnavailableError"
+ action: "auto_retry"
+
+ ignorable:
+ errors:
+ - "ClientCancellationError"
+ - "NotFoundError"
+ - "ValidationError"
+ action: "log_only"
+
+ warning:
+ errors:
+ - "DeprecationWarning"
+ - "ConfigurationWarning"
+ action: "log_only"
+
+# Recovery Strategy Definitions
+auto_fix_strategies:
+ - error_type: "DatabaseConnectionError"
+ strategy: "reconnect_with_backoff"
+ max_attempts: 5
+ base_delay: 2
+
+ - error_type: "RateLimitError"
+ strategy: "rate_limit_backoff"
+ max_attempts: 3
+ base_delay: 60
+
+ - error_type: "TimeoutError"
+ strategy: "exponential_backoff"
+ max_attempts: 5
+ base_delay: 2
+
+ - error_type: "ConnectionError"
+ strategy: "reconnect_with_backoff"
+ max_attempts: 5
+ base_delay: 2
+
+ - error_type: "OutOfMemoryError"
+ strategy: "restart_with_increased_memory"
+ max_attempts: 1
+ base_delay: 10
+
+ - error_type: "SecurityError"
+ strategy: "refresh_credentials"
+ max_attempts: 3
+ base_delay: 5
+
+# Pattern-based Error Classification
+error_patterns:
+ - pattern: "connection.*refused|timeout|timed out"
+ category: "recoverable"
+ strategy: "exponential_backoff"
+ severity: 6
+
+ - pattern: "out of memory|oom|memory error"
+ category: "critical"
+ strategy: "restart_with_increased_memory"
+ severity: 10
+
+ - pattern: "rate limit|too many requests|429"
+ category: "recoverable"
+ strategy: "rate_limit_backoff"
+ severity: 4
+
+ - pattern: "database.*connection|db error|sql"
+ category: "critical"
+ strategy: "reconnect_with_backoff"
+ severity: 8
+
+ - pattern: "unauthorized|authentication|forbidden"
+ category: "critical"
+ strategy: "refresh_credentials"
+ severity: 9
+
+ - pattern: "not found|404"
+ category: "warning"
+ strategy: "log_and_continue"
+ severity: 3
+
+# Alert Configuration
+alerts:
+ # Enable email alerts
+ email_enabled: false
+ email_recipients:
+ - "devops@example.com"
+
+ # Enable Slack alerts
+ slack_enabled: false
+ slack_webhook: "${SLACK_WEBHOOK_URL}"
+
+ # Enable PagerDuty alerts
+ pagerduty_enabled: false
+ pagerduty_key: "${PAGERDUTY_KEY}"
\ No newline at end of file
diff --git a/config_stress_test_config.yaml (1).txt b/config_stress_test_config.yaml (1).txt
new file mode 100644
index 00000000..41963a7b
--- /dev/null
+++ b/config_stress_test_config.yaml (1).txt
@@ -0,0 +1,143 @@
+# Intelligent Stress Testing System Configuration
+
+# Stress Test Configuration
+stress_test:
+ # Test duration in seconds (0 = infinite)
+ duration: 3600
+
+ # Maximum concurrent users/connections
+ max_concurrent_users: 1000
+
+ # Ramp-up time in seconds
+ ramp_up_time: 300
+
+ # Base URL for testing
+ base_url: "http://localhost:8000"
+
+ # Test interval between cycles (seconds)
+ test_interval: 1
+
+ # Endpoints to test
+ endpoints:
+ - path: "/api/transactions"
+ method: "POST"
+ weight: 0.6
+ - path: "/api/accounts"
+ method: "GET"
+ weight: 0.3
+ - path: "/api/status"
+ method: "GET"
+ weight: 0.1
+
+# Error Handling Configuration
+error_handling:
+ # Maximum retry attempts
+ max_retries: 5
+
+ # Base retry delay in seconds
+ retry_delay: 2
+
+ # Use exponential backoff
+ exponential_backoff: true
+
+ # Enable automatic recovery
+ auto_recovery: true
+
+ # Error category definitions
+ error_categories:
+ critical:
+ - "DatabaseConnectionError"
+ - "OutOfMemoryError"
+ - "SystemError"
+ recoverable:
+ - "TimeoutError"
+ - "ConnectionError"
+ - "RateLimitError"
+ - "TemporaryError"
+ ignorable:
+ - "ClientCancellationError"
+ - "NotFoundError"
+
+ # Auto-fix strategies
+ auto_fix_strategies:
+ - error_type: "DatabaseConnectionError"
+ strategy: "reconnect_with_backoff"
+ - error_type: "RateLimitError"
+ strategy: "rate_limit_backoff"
+ - error_type: "TimeoutError"
+ strategy: "exponential_backoff"
+ - error_type: "ConnectionError"
+ strategy: "reconnect_with_backoff"
+
+# Monitoring Configuration
+monitoring:
+ # Metrics collection interval (seconds)
+ metrics_interval: 10
+
+ # Metrics aggregation interval (seconds)
+ aggregation_interval: 60
+
+ # Dashboard port
+ dashboard_port: 8080
+
+ # Enable alerts
+ enable_alerts: true
+
+ # Alert thresholds
+ alert_thresholds:
+ error_rate: 10.0 # percentage
+ stress_load: 90.0 # percentage
+
+ # Storage path for metrics
+ storage_path: "data/metrics.json"
+
+# Network Simulation (optional)
+network:
+ # Enable network simulation
+ enabled: false
+
+ # Simulated latency in milliseconds
+ latency_ms: 0
+
+ # Packet loss percentage
+ packet_loss: 0.0
+
+ # Bandwidth limit in Mbps
+ bandwidth_limit_mbps: 0
+
+# Pull Request Automation
+pr_automation:
+ # GitHub repository owner
+ repo_owner: "PiCoreTeam"
+
+ # GitHub repository name
+ repo_name: "pi-node-docker"
+
+ # Base branch for PRs
+ base_branch: "master"
+
+ # GitHub token (use environment variable)
+ github_token: "${GITHUB_TOKEN}"
+
+ # Auto-merge approved PRs
+ auto_merge: true
+
+ # Merge method (merge, squash, rebase)
+ merge_method: "squash"
+
+# Logging Configuration
+logging:
+ # Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+ level: "INFO"
+
+ # Log file path
+ file: "logs/stress_testing.log"
+
+ # Log format
+ format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+ # Maximum log file size in bytes
+ max_bytes: 10485760 # 10MB
+
+ # Number of backup log files
+ backup_count: 5
\ No newline at end of file
diff --git a/config_stress_test_config.yaml (2).txt b/config_stress_test_config.yaml (2).txt
new file mode 100644
index 00000000..41963a7b
--- /dev/null
+++ b/config_stress_test_config.yaml (2).txt
@@ -0,0 +1,143 @@
+# Intelligent Stress Testing System Configuration
+
+# Stress Test Configuration
+stress_test:
+ # Test duration in seconds (0 = infinite)
+ duration: 3600
+
+ # Maximum concurrent users/connections
+ max_concurrent_users: 1000
+
+ # Ramp-up time in seconds
+ ramp_up_time: 300
+
+ # Base URL for testing
+ base_url: "http://localhost:8000"
+
+ # Test interval between cycles (seconds)
+ test_interval: 1
+
+ # Endpoints to test
+ endpoints:
+ - path: "/api/transactions"
+ method: "POST"
+ weight: 0.6
+ - path: "/api/accounts"
+ method: "GET"
+ weight: 0.3
+ - path: "/api/status"
+ method: "GET"
+ weight: 0.1
+
+# Error Handling Configuration
+error_handling:
+ # Maximum retry attempts
+ max_retries: 5
+
+ # Base retry delay in seconds
+ retry_delay: 2
+
+ # Use exponential backoff
+ exponential_backoff: true
+
+ # Enable automatic recovery
+ auto_recovery: true
+
+ # Error category definitions
+ error_categories:
+ critical:
+ - "DatabaseConnectionError"
+ - "OutOfMemoryError"
+ - "SystemError"
+ recoverable:
+ - "TimeoutError"
+ - "ConnectionError"
+ - "RateLimitError"
+ - "TemporaryError"
+ ignorable:
+ - "ClientCancellationError"
+ - "NotFoundError"
+
+ # Auto-fix strategies
+ auto_fix_strategies:
+ - error_type: "DatabaseConnectionError"
+ strategy: "reconnect_with_backoff"
+ - error_type: "RateLimitError"
+ strategy: "rate_limit_backoff"
+ - error_type: "TimeoutError"
+ strategy: "exponential_backoff"
+ - error_type: "ConnectionError"
+ strategy: "reconnect_with_backoff"
+
+# Monitoring Configuration
+monitoring:
+ # Metrics collection interval (seconds)
+ metrics_interval: 10
+
+ # Metrics aggregation interval (seconds)
+ aggregation_interval: 60
+
+ # Dashboard port
+ dashboard_port: 8080
+
+ # Enable alerts
+ enable_alerts: true
+
+ # Alert thresholds
+ alert_thresholds:
+ error_rate: 10.0 # percentage
+ stress_load: 90.0 # percentage
+
+ # Storage path for metrics
+ storage_path: "data/metrics.json"
+
+# Network Simulation (optional)
+network:
+ # Enable network simulation
+ enabled: false
+
+ # Simulated latency in milliseconds
+ latency_ms: 0
+
+ # Packet loss percentage
+ packet_loss: 0.0
+
+ # Bandwidth limit in Mbps
+ bandwidth_limit_mbps: 0
+
+# Pull Request Automation
+pr_automation:
+ # GitHub repository owner
+ repo_owner: "PiCoreTeam"
+
+ # GitHub repository name
+ repo_name: "pi-node-docker"
+
+ # Base branch for PRs
+ base_branch: "master"
+
+ # GitHub token (use environment variable)
+ github_token: "${GITHUB_TOKEN}"
+
+ # Auto-merge approved PRs
+ auto_merge: true
+
+ # Merge method (merge, squash, rebase)
+ merge_method: "squash"
+
+# Logging Configuration
+logging:
+ # Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+ level: "INFO"
+
+ # Log file path
+ file: "logs/stress_testing.log"
+
+ # Log format
+ format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+ # Maximum log file size in bytes
+ max_bytes: 10485760 # 10MB
+
+ # Number of backup log files
+ backup_count: 5
\ No newline at end of file
diff --git a/config_stress_test_config.yaml.txt b/config_stress_test_config.yaml.txt
new file mode 100644
index 00000000..41963a7b
--- /dev/null
+++ b/config_stress_test_config.yaml.txt
@@ -0,0 +1,143 @@
+# Intelligent Stress Testing System Configuration
+
+# Stress Test Configuration
+stress_test:
+ # Test duration in seconds (0 = infinite)
+ duration: 3600
+
+ # Maximum concurrent users/connections
+ max_concurrent_users: 1000
+
+ # Ramp-up time in seconds
+ ramp_up_time: 300
+
+ # Base URL for testing
+ base_url: "http://localhost:8000"
+
+ # Test interval between cycles (seconds)
+ test_interval: 1
+
+ # Endpoints to test
+ endpoints:
+ - path: "/api/transactions"
+ method: "POST"
+ weight: 0.6
+ - path: "/api/accounts"
+ method: "GET"
+ weight: 0.3
+ - path: "/api/status"
+ method: "GET"
+ weight: 0.1
+
+# Error Handling Configuration
+error_handling:
+ # Maximum retry attempts
+ max_retries: 5
+
+ # Base retry delay in seconds
+ retry_delay: 2
+
+ # Use exponential backoff
+ exponential_backoff: true
+
+ # Enable automatic recovery
+ auto_recovery: true
+
+ # Error category definitions
+ error_categories:
+ critical:
+ - "DatabaseConnectionError"
+ - "OutOfMemoryError"
+ - "SystemError"
+ recoverable:
+ - "TimeoutError"
+ - "ConnectionError"
+ - "RateLimitError"
+ - "TemporaryError"
+ ignorable:
+ - "ClientCancellationError"
+ - "NotFoundError"
+
+ # Auto-fix strategies
+ auto_fix_strategies:
+ - error_type: "DatabaseConnectionError"
+ strategy: "reconnect_with_backoff"
+ - error_type: "RateLimitError"
+ strategy: "rate_limit_backoff"
+ - error_type: "TimeoutError"
+ strategy: "exponential_backoff"
+ - error_type: "ConnectionError"
+ strategy: "reconnect_with_backoff"
+
+# Monitoring Configuration
+monitoring:
+ # Metrics collection interval (seconds)
+ metrics_interval: 10
+
+ # Metrics aggregation interval (seconds)
+ aggregation_interval: 60
+
+ # Dashboard port
+ dashboard_port: 8080
+
+ # Enable alerts
+ enable_alerts: true
+
+ # Alert thresholds
+ alert_thresholds:
+ error_rate: 10.0 # percentage
+ stress_load: 90.0 # percentage
+
+ # Storage path for metrics
+ storage_path: "data/metrics.json"
+
+# Network Simulation (optional)
+network:
+ # Enable network simulation
+ enabled: false
+
+ # Simulated latency in milliseconds
+ latency_ms: 0
+
+ # Packet loss percentage
+ packet_loss: 0.0
+
+ # Bandwidth limit in Mbps
+ bandwidth_limit_mbps: 0
+
+# Pull Request Automation
+pr_automation:
+ # GitHub repository owner
+ repo_owner: "PiCoreTeam"
+
+ # GitHub repository name
+ repo_name: "pi-node-docker"
+
+ # Base branch for PRs
+ base_branch: "master"
+
+ # GitHub token (use environment variable)
+ github_token: "${GITHUB_TOKEN}"
+
+ # Auto-merge approved PRs
+ auto_merge: true
+
+ # Merge method (merge, squash, rebase)
+ merge_method: "squash"
+
+# Logging Configuration
+logging:
+ # Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+ level: "INFO"
+
+ # Log file path
+ file: "logs/stress_testing.log"
+
+ # Log format
+ format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+
+ # Maximum log file size in bytes
+ max_bytes: 10485760 # 10MB
+
+ # Number of backup log files
+ backup_count: 5
\ No newline at end of file
diff --git a/src_common_constants.py b/src_common_constants.py
new file mode 100644
index 00000000..db4bb57d
--- /dev/null
+++ b/src_common_constants.py
@@ -0,0 +1,74 @@
+"""
+System Constants
+Centralized constants used across the system
+"""
+
+# Version
+VERSION = "1.0.0"
+SYSTEM_NAME = "Intelligent Stress Testing System"
+
+# Error Categories
+ERROR_CATEGORY_CRITICAL = "critical"
+ERROR_CATEGORY_RECOVERABLE = "recoverable"
+ERROR_CATEGORY_IGNORABLE = "ignorable"
+ERROR_CATEGORY_WARNING = "warning"
+
+# Error Actions
+ERROR_ACTION_ALERT_AND_RETRY = "alert_and_retry"
+ERROR_ACTION_AUTO_RETRY = "auto_retry"
+ERROR_ACTION_LOG_ONLY = "log_only"
+ERROR_ACTION_ESCALATE = "escalate"
+
+# Recovery Strategies
+STRATEGY_GENERIC_RETRY = "generic_retry"
+STRATEGY_EXPONENTIAL_BACKOFF = "exponential_backoff"
+STRATEGY_RECONNECT = "reconnect_with_backoff"
+STRATEGY_RATE_LIMIT = "rate_limit_backoff"
+STRATEGY_RESTART = "restart_with_increased_memory"
+STRATEGY_REFRESH_CREDENTIALS = "refresh_credentials"
+STRATEGY_LOG_CONTINUE = "log_and_continue"
+
+# Default Configuration Values
+DEFAULT_MAX_RETRIES = 5
+DEFAULT_RETRY_DELAY = 2.0
+DEFAULT_MAX_CONCURRENT = 100
+DEFAULT_TEST_DURATION = 3600
+DEFAULT_METRICS_INTERVAL = 10
+DEFAULT_DASHBOARD_PORT = 8080
+
+# HTTP Status Codes
+HTTP_OK = 200
+HTTP_CREATED = 201
+HTTP_BAD_REQUEST = 400
+HTTP_UNAUTHORIZED = 401
+HTTP_FORBIDDEN = 403
+HTTP_NOT_FOUND = 404
+HTTP_RATE_LIMIT = 429
+HTTP_SERVER_ERROR = 500
+
+# Metrics Thresholds
+ERROR_RATE_WARNING_THRESHOLD = 5.0 # percentage
+ERROR_RATE_CRITICAL_THRESHOLD = 10.0 # percentage
+LOAD_WARNING_THRESHOLD = 70.0 # percentage
+LOAD_CRITICAL_THRESHOLD = 90.0 # percentage
+
+# Time Constants
+SECOND = 1
+MINUTE = 60
+HOUR = 3600
+DAY = 86400
+
+# Storage Paths
+DEFAULT_LOGS_DIR = "logs"
+DEFAULT_DATA_DIR = "data"
+DEFAULT_CONFIG_DIR = "config"
+
+# GitHub PR Settings
+PR_LABEL_AUTOMATED = "automated"
+PR_LABEL_STRESS_TEST = "stress-test"
+PR_LABEL_ERROR_FIX = "error-fix"
+
+# Dashboard Settings
+DASHBOARD_UPDATE_INTERVAL = 2 # seconds
+METRICS_HISTORY_LIMIT = 1000
+METRICS_DISPLAY_LIMIT = 100
\ No newline at end of file
diff --git a/src_common_logger.py b/src_common_logger.py
new file mode 100644
index 00000000..52f1785c
--- /dev/null
+++ b/src_common_logger.py
@@ -0,0 +1,75 @@
+"""
+Logging Configuration
+Centralized logging setup for all components
+"""
+
+import logging
+import logging.handlers
+import sys
+from pathlib import Path
+from typing import Dict, Any
+
+
+def setup_logging(config: Dict[str, Any] = None) -> logging.Logger:
+ """
+ Setup centralized logging configuration
+
+ Args:
+ config: Logging configuration dictionary
+
+ Returns:
+ Configured logger instance
+ """
+ if config is None:
+ config = {}
+
+ # Get configuration values
+ log_level = config.get('level', 'INFO')
+ log_file = config.get('file', 'logs/stress_testing.log')
+ log_format = config.get(
+ 'format',
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+ )
+ max_bytes = config.get('max_bytes', 10 * 1024 * 1024) # 10MB
+ backup_count = config.get('backup_count', 5)
+
+ # Create logs directory if it doesn't exist
+ log_path = Path(log_file)
+ log_path.parent.mkdir(parents=True, exist_ok=True)
+
+ # Create formatter
+ formatter = logging.Formatter(log_format)
+
+ # Setup root logger
+ root_logger = logging.getLogger()
+ root_logger.setLevel(getattr(logging, log_level.upper()))
+
+ # Remove existing handlers
+ root_logger.handlers = []
+
+ # Console handler
+ console_handler = logging.StreamHandler(sys.stdout)
+ console_handler.setLevel(getattr(logging, log_level.upper()))
+ console_handler.setFormatter(formatter)
+ root_logger.addHandler(console_handler)
+
+ # File handler with rotation
+ file_handler = logging.handlers.RotatingFileHandler(
+ log_file,
+ maxBytes=max_bytes,
+ backupCount=backup_count
+ )
+ file_handler.setLevel(getattr(logging, log_level.upper()))
+ file_handler.setFormatter(formatter)
+ root_logger.addHandler(file_handler)
+
+ # Create application logger
+ logger = logging.getLogger('stress_testing_system')
+ logger.info("Logging system initialized")
+
+ return logger
+
+
+def get_logger(name: str) -> logging.Logger:
+ """Get a logger instance for a specific module"""
+ return logging.getLogger(f'stress_testing_system.{name}')
\ No newline at end of file
diff --git a/src_common_utils.py b/src_common_utils.py
new file mode 100644
index 00000000..3ff19f74
--- /dev/null
+++ b/src_common_utils.py
@@ -0,0 +1,137 @@
+"""
+Common Utility Functions
+Shared utility functions used across the system
+"""
+
+import yaml
+import json
+import os
+from pathlib import Path
+from typing import Dict, Any
+import hashlib
+import time
+
+
+def load_config(config_path: str) -> Dict[str, Any]:
+ """
+ Load configuration from YAML file
+
+ Args:
+ config_path: Path to configuration file
+
+ Returns:
+ Configuration dictionary
+ """
+ config_file = Path(config_path)
+
+ if not config_file.exists():
+ raise FileNotFoundError(f"Configuration file not found: {config_path}")
+
+ with open(config_file, 'r') as f:
+ if config_path.endswith('.yaml') or config_path.endswith('.yml'):
+ config = yaml.safe_load(f)
+ elif config_path.endswith('.json'):
+ config = json.load(f)
+ else:
+ raise ValueError(f"Unsupported configuration file format: {config_path}")
+
+ # Environment variable substitution
+ config = substitute_env_vars(config)
+
+ return config
+
+
+def substitute_env_vars(data: Any) -> Any:
+ """
+ Recursively substitute environment variables in configuration
+ Format: ${VAR_NAME} or ${VAR_NAME:default_value}
+ """
+ if isinstance(data, dict):
+ return {k: substitute_env_vars(v) for k, v in data.items()}
+ elif isinstance(data, list):
+ return [substitute_env_vars(item) for item in data]
+ elif isinstance(data, str):
+ if data.startswith('${') and data.endswith('}'):
+ var_expr = data[2:-1]
+ if ':' in var_expr:
+ var_name, default = var_expr.split(':', 1)
+ return os.getenv(var_name, default)
+ else:
+ return os.getenv(var_expr, data)
+ return data
+
+
+def save_json(data: Dict[str, Any], filepath: str, pretty: bool = True):
+ """Save data to JSON file"""
+ Path(filepath).parent.mkdir(parents=True, exist_ok=True)
+
+ with open(filepath, 'w') as f:
+ if pretty:
+ json.dump(data, f, indent=2)
+ else:
+ json.dump(data, f)
+
+
+def load_json(filepath: str) -> Dict[str, Any]:
+ """Load data from JSON file"""
+ with open(filepath, 'r') as f:
+ return json.load(f)
+
+
+def calculate_hash(data: str, algorithm: str = 'sha256') -> str:
+ """Calculate hash of string data"""
+ hasher = hashlib.new(algorithm)
+ hasher.update(data.encode('utf-8'))
+ return hasher.hexdigest()
+
+
+def format_bytes(bytes_value: int) -> str:
+ """Format bytes to human readable format"""
+ for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+ if bytes_value < 1024.0:
+ return f"{bytes_value:.2f} {unit}"
+ bytes_value /= 1024.0
+ return f"{bytes_value:.2f} PB"
+
+
+def format_duration(seconds: float) -> str:
+ """Format duration in seconds to human readable format"""
+ if seconds < 60:
+ return f"{seconds:.2f}s"
+ elif seconds < 3600:
+ minutes = seconds / 60
+ return f"{minutes:.2f}m"
+ elif seconds < 86400:
+ hours = seconds / 3600
+ return f"{hours:.2f}h"
+ else:
+ days = seconds / 86400
+ return f"{days:.2f}d"
+
+
+def ensure_directory(path: str):
+ """Ensure directory exists"""
+ Path(path).mkdir(parents=True, exist_ok=True)
+
+
+def timestamp_to_iso(timestamp: float) -> str:
+ """Convert Unix timestamp to ISO format string"""
+ from datetime import datetime
+ return datetime.fromtimestamp(timestamp).isoformat()
+
+
+class Timer:
+ """Context manager for timing operations"""
+
+ def __init__(self, name: str = "Operation"):
+ self.name = name
+ self.start_time = None
+ self.duration = None
+
+ def __enter__(self):
+ self.start_time = time.time()
+ return self
+
+ def __exit__(self, *args):
+ self.duration = time.time() - self.start_time
+ print(f"{self.name} took {format_duration(self.duration)}")
\ No newline at end of file
diff --git a/src_dashboard_backend_api.py b/src_dashboard_backend_api.py
new file mode 100644
index 00000000..ad95931f
--- /dev/null
+++ b/src_dashboard_backend_api.py
@@ -0,0 +1,113 @@
+"""
+Dashboard API Server
+Provides RESTful API for dashboard frontend
+"""
+
+import asyncio
+import logging
+from typing import Dict, Any
+from aiohttp import web
+
+
+
+class DashboardAPI:
+ """RESTful API server for dashboard"""
+
+ def __init__(self, metrics_collector, config: Dict[str, Any]):
+ self.metrics_collector = metrics_collector
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.app = web.Application()
+ self._setup_routes()
+
+ def _setup_routes(self):
+ """Setup API routes"""
+ self.app.router.add_get('/api/metrics/current', self.get_current_metrics)
+ self.app.router.add_get('/api/metrics/history', self.get_metrics_history)
+ self.app.router.add_get('/api/health', self.health_check)
+ self.app.router.add_get('/', self.serve_dashboard)
+
+ # Enable CORS
+ self.app.middlewares.append(self._cors_middleware)
+
+ @web.middleware
+ async def _cors_middleware(self, request, handler):
+ """CORS middleware with basic preflight handling and restricted origins."""
+ origin = request.headers.get('Origin')
+
+ # Allow overriding CORS origins via config; default to localhost-only.
+ allowed_origins = self.config.get(
+ 'allowed_origins',
+ ['http://localhost', 'http://127.0.0.1']
+ )
+
+ # Handle preflight OPTIONS requests directly to avoid 405s on handlers.
+ if request.method == 'OPTIONS':
+ response = web.Response(status=204)
+ else:
+ response = await handler(request)
+
+ # Set CORS headers only for allowed origins.
+ if origin and origin in allowed_origins:
+ response.headers['Access-Control-Allow-Origin'] = origin
+ # Ensure caches vary on Origin when CORS is applied.
+ existing_vary = response.headers.get('Vary')
+ if existing_vary:
+ if 'Origin' not in existing_vary:
+ response.headers['Vary'] = existing_vary + ', Origin'
+ else:
+ response.headers['Vary'] = 'Origin'
+
+ response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS'
+ response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
+ response.headers['Access-Control-Max-Age'] = '3600'
+ return response
+
+ async def get_current_metrics(self, request):
+ """Get current metrics"""
+ try:
+ metrics = self.metrics_collector.get_current_metrics()
+ return web.json_response(metrics)
+ except Exception as e:
+ self.logger.error(f"Error getting current metrics: {e}")
+ return web.json_response({'error': str(e)}, status=500)
+
+ async def get_metrics_history(self, request):
+ """Get metrics history"""
+ try:
+ limit = int(request.query.get('limit', 100))
+ history = self.metrics_collector.get_metrics_history(limit)
+ return web.json_response({'history': history})
+ except Exception as e:
+ self.logger.error(f"Error getting metrics history: {e}")
+ return web.json_response({'error': str(e)}, status=500)
+
+ async def health_check(self, request):
+ """Health check endpoint"""
+ return web.json_response({'status': 'healthy', 'timestamp': asyncio.get_event_loop().time()})
+
+ async def serve_dashboard(self, request):
+ """Serve dashboard HTML"""
+ html = """
+
+
+
+ Stress Testing Dashboard
+
+
+
+ Stress Testing Dashboard
+ API is running. Use /api/metrics/current to get metrics.
+
+
+ """
+ return web.Response(text=html, content_type='text/html')
+
+ async def start(self):
+ """Start the API server"""
+ port = self.config.get('port', 8080)
+ runner = web.AppRunner(self.app)
+ await runner.setup()
+ site = web.TCPSite(runner, '0.0.0.0', port)
+ await site.start()
+ self.logger.info(f"Dashboard API started on port {port}")
\ No newline at end of file
diff --git a/src_dashboard_backend_metrics_collector.py b/src_dashboard_backend_metrics_collector.py
new file mode 100644
index 00000000..8b192a8a
--- /dev/null
+++ b/src_dashboard_backend_metrics_collector.py
@@ -0,0 +1,142 @@
+"""
+Metrics Collection System
+Collects, aggregates, and stores system metrics
+"""
+
+
+import logging
+import time
+from typing import Dict, Any, List
+from dataclasses import dataclass, asdict
+from datetime import datetime
+import json
+
+
+@dataclass
+class SystemMetrics:
+ """Container for system metrics"""
+ timestamp: float
+ error_rate: float = 0.0
+ stress_load: float = 0.0
+ bypassed_errors: int = 0
+ fixed_errors: int = 0
+ manual_intervention_count: int = 0
+ success_rate: float = 100.0
+ uptime: float = 0.0
+ active_recovery_processes: int = 0
+ total_requests: int = 0
+ successful_requests: int = 0
+ failed_requests: int = 0
+
+
+class MetricsCollector:
+ """Collects and manages system metrics"""
+
+ def __init__(self, config: Dict[str, Any]):
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.metrics = SystemMetrics(timestamp=time.time())
+ self.metrics_history: List[SystemMetrics] = []
+ self.start_time = time.time()
+ self.storage_path = config.get('storage_path', 'data/metrics.json')
+
+ async def record_test_results(self, results):
+ """Record stress test results"""
+ self.metrics.total_requests += results.total_requests
+ self.metrics.successful_requests += results.successful_requests
+ self.metrics.failed_requests += results.failed_requests
+
+ # Calculate error rate
+ if self.metrics.total_requests > 0:
+ self.metrics.error_rate = (
+ self.metrics.failed_requests / self.metrics.total_requests
+ ) * 100
+
+ self.metrics.success_rate = (
+ self.metrics.successful_requests / self.metrics.total_requests
+ ) * 100
+
+ self.logger.debug(f"Recorded test results: {results.total_requests} requests")
+
+ async def record_error(self, classification):
+ """Record an error classification"""
+
+ self.logger.debug(f"Recorded error: {classification.error_type}")
+
+ async def record_critical_error(self, exception: Exception):
+ """Record a critical error"""
+ self.logger.critical(f"Critical error recorded: {exception}")
+
+ async def increment_active_recovery(self):
+ """Increment active recovery process count"""
+ self.metrics.active_recovery_processes += 1
+
+ async def decrement_active_recovery(self):
+ """Decrement active recovery process count"""
+ self.metrics.active_recovery_processes = max(
+ 0,
+ self.metrics.active_recovery_processes - 1
+ )
+
+ async def increment_fixed_errors(self):
+ """Increment fixed errors count"""
+ self.metrics.fixed_errors += 1
+ self.logger.info(f"Fixed errors count: {self.metrics.fixed_errors}")
+
+ async def increment_bypassed_errors(self):
+ """Increment bypassed errors count"""
+ self.metrics.bypassed_errors += 1
+ self.logger.info(f"Bypassed errors count: {self.metrics.bypassed_errors}")
+
+ async def increment_manual_intervention_count(self):
+ """Increment manual intervention count"""
+ self.metrics.manual_intervention_count += 1
+ self.logger.warning(
+ f"Manual intervention count: {self.metrics.manual_intervention_count}"
+ )
+
+ async def record_system_metrics(self, metrics: Dict[str, Any]):
+ """Record system-level metrics"""
+ self.metrics.stress_load = metrics.get('load', 0.0)
+ self.metrics.uptime = time.time() - self.start_time
+
+ async def aggregate_and_store(self):
+ """Aggregate and store metrics"""
+ # Update timestamp
+ self.metrics.timestamp = time.time()
+
+ # Add to history
+ self.metrics_history.append(SystemMetrics(**asdict(self.metrics)))
+
+ # Keep only last 1000 entries
+ if len(self.metrics_history) > 1000:
+ self.metrics_history = self.metrics_history[-1000:]
+
+ # Store to file
+ await self._save_metrics()
+
+ self.logger.debug("Metrics aggregated and stored")
+
+ async def _save_metrics(self):
+ """Save metrics to storage"""
+ try:
+
+ # In real implementation, save to database or file
+ # For now, just log
+ self.logger.debug(f"Metrics saved: {len(self.metrics_history)} entries")
+
+ except Exception as e:
+ self.logger.error(f"Error saving metrics: {e}")
+
+ def get_current_metrics(self) -> Dict[str, Any]:
+ """Get current metrics as dictionary"""
+ return asdict(self.metrics)
+
+ def get_metrics_history(self, limit: int = 100) -> List[Dict[str, Any]]:
+ """Get metrics history"""
+ return [asdict(m) for m in self.metrics_history[-limit:]]
+
+ async def close(self):
+ """Close metrics collector"""
+ await self._save_metrics()
+ self.logger.info("Metrics collector closed")
\ No newline at end of file
diff --git a/src_dashboard_frontend_index.html b/src_dashboard_frontend_index.html
new file mode 100644
index 00000000..6f1d77c1
--- /dev/null
+++ b/src_dashboard_frontend_index.html
@@ -0,0 +1,156 @@
+
+
+
+
+
+ Intelligent Stress Testing Dashboard
+
+
+
+
+
+
+
+
+
+
+
+
+
⚠️
+
+
Error Rate
+
0.00%
+
of total requests
+
+
+
+
+
+
📊
+
+
Stress Load
+
0.00%
+
current capacity
+
+
+
+
+
+
⏭️
+
+
Bypassed Errors
+
0
+
auto-handled
+
+
+
+
+
+
✅
+
+
Fixed Errors
+
0
+
automatically resolved
+
+
+
+
+
+
👤
+
+
Manual Intervention
+
0
+
requires attention
+
+
+
+
+
+
🎯
+
+
Success Rate
+
100.00%
+
overall performance
+
+
+
+
+
+
⏱️
+
+
System Uptime
+
0h 0m
+
continuous operation
+
+
+
+
+
+
🔧
+
+
Active Recovery
+
0
+
processes running
+
+
+
+
+
+
+
+
Error Rate Trend
+
+
+
+
Request Statistics
+
+
+
+
+
+
+
📈 Detailed Statistics
+
+
+
+ | Metric |
+ Current Value |
+ Total |
+
+
+
+
+ | Total Requests |
+ 0 |
+ - |
+
+
+ | Successful Requests |
+ 0 |
+ 0% |
+
+
+ | Failed Requests |
+ 0 |
+ 0% |
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src_dashboard_frontend_styles.css b/src_dashboard_frontend_styles.css
new file mode 100644
index 00000000..7756c79c
--- /dev/null
+++ b/src_dashboard_frontend_styles.css
@@ -0,0 +1,243 @@
+/* Global Styles */
+* {
+ margin: 0;
+ padding: 0;
+ box-sizing: border-box;
+}
+
+body {
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+ color: #333;
+ line-height: 1.6;
+ min-height: 100vh;
+}
+
+.container {
+ max-width: 1400px;
+ margin: 0 auto;
+ padding: 20px;
+}
+
+/* Header */
+.header {
+ background: white;
+ padding: 30px;
+ border-radius: 15px;
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
+ margin-bottom: 30px;
+ display: flex;
+ justify-content: space-between;
+ align-items: center;
+}
+
+.header h1 {
+ font-size: 2.5rem;
+ color: #667eea;
+ text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.1);
+}
+
+.status-indicator {
+ display: flex;
+ align-items: center;
+ gap: 10px;
+ font-size: 1.1rem;
+}
+
+.status-dot {
+ width: 15px;
+ height: 15px;
+ border-radius: 50%;
+ background-color: #ffc107;
+ animation: pulse 2s infinite;
+}
+
+.status-dot.online {
+ background-color: #4caf50;
+}
+
+.status-dot.offline {
+ background-color: #f44336;
+ animation: none;
+}
+
+@keyframes pulse {
+ 0%, 100% {
+ opacity: 1;
+ }
+ 50% {
+ opacity: 0.5;
+ }
+}
+
+/* Metrics Grid */
+.metrics-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+ gap: 20px;
+ margin-bottom: 30px;
+}
+
+.metric-card {
+ background: white;
+ padding: 25px;
+ border-radius: 15px;
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
+ display: flex;
+ align-items: center;
+ gap: 20px;
+ transition: transform 0.3s ease, box-shadow 0.3s ease;
+}
+
+.metric-card:hover {
+ transform: translateY(-5px);
+ box-shadow: 0 10px 25px rgba(0, 0, 0, 0.15);
+}
+
+.metric-card.success {
+ border-left: 5px solid #4caf50;
+}
+
+.metric-card.warning {
+ border-left: 5px solid #ff9800;
+}
+
+.metric-card.error {
+ border-left: 5px solid #f44336;
+}
+
+.metric-icon {
+ font-size: 3rem;
+}
+
+.metric-content {
+ flex: 1;
+}
+
+.metric-content h3 {
+ font-size: 0.9rem;
+ color: #666;
+ text-transform: uppercase;
+ letter-spacing: 1px;
+ margin-bottom: 10px;
+}
+
+.metric-value {
+ font-size: 2.5rem;
+ font-weight: bold;
+ color: #667eea;
+ margin-bottom: 5px;
+}
+
+.metric-label {
+ font-size: 0.85rem;
+ color: #999;
+}
+
+/* Charts Section */
+.charts-section {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
+ gap: 20px;
+ margin-bottom: 30px;
+}
+
+.chart-card {
+ background: white;
+ padding: 25px;
+ border-radius: 15px;
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
+}
+
+.chart-card h3 {
+ margin-bottom: 20px;
+ color: #667eea;
+ font-size: 1.3rem;
+}
+
+/* Statistics Table */
+.stats-section {
+ background: white;
+ padding: 30px;
+ border-radius: 15px;
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
+ margin-bottom: 30px;
+}
+
+.stats-section h2 {
+ margin-bottom: 20px;
+ color: #667eea;
+}
+
+.stats-table {
+ width: 100%;
+ border-collapse: collapse;
+}
+
+.stats-table th,
+.stats-table td {
+ padding: 15px;
+ text-align: left;
+ border-bottom: 1px solid #eee;
+}
+
+.stats-table th {
+ background-color: #f8f9fa;
+ font-weight: 600;
+ color: #667eea;
+}
+
+.stats-table tr:hover {
+ background-color: #f8f9fa;
+}
+
+/* Footer */
+.footer {
+ background: white;
+ padding: 20px;
+ border-radius: 15px;
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
+ text-align: center;
+ color: #666;
+}
+
+.footer p {
+ margin: 5px 0;
+}
+
+/* Responsive Design */
+@media (max-width: 768px) {
+ .header {
+ flex-direction: column;
+ gap: 20px;
+ text-align: center;
+ }
+
+ .header h1 {
+ font-size: 1.8rem;
+ }
+
+ .metrics-grid {
+ grid-template-columns: 1fr;
+ }
+
+ .charts-section {
+ grid-template-columns: 1fr;
+ }
+}
+
+/* Loading Animation */
+@keyframes shimmer {
+ 0% {
+ background-position: -1000px 0;
+ }
+ 100% {
+ background-position: 1000px 0;
+ }
+}
+
+.loading {
+ background: linear-gradient(to right, #f6f7f8 0%, #edeef1 20%, #f6f7f8 40%, #f6f7f8 100%);
+ background-size: 1000px 100%;
+ animation: shimmer 2s infinite linear;
+}
\ No newline at end of file
diff --git a/src_error_handler_auto_recovery.py b/src_error_handler_auto_recovery.py
new file mode 100644
index 00000000..6345bab8
--- /dev/null
+++ b/src_error_handler_auto_recovery.py
@@ -0,0 +1,269 @@
+"""
+Automatic Error Recovery System
+Handles error recovery with retry logic and multiple strategies
+"""
+
+import asyncio
+import logging
+import time
+from typing import Optional, Dict, Any, Callable
+from dataclasses import dataclass
+from error_handler.error_classifier import ErrorClassification, ErrorAction
+from error_handler.retry_manager import RetryManager
+
+
+@dataclass
+class RecoveryResult:
+ """Result of a recovery attempt"""
+ success: bool
+ attempts: int
+ duration: float
+ strategy_used: str
+ error_message: Optional[str] = None
+
+
+class AutoRecoveryManager:
+ """Manages automatic error recovery with multiple strategies"""
+
+ def __init__(self, config: Dict[str, Any]):
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.retry_manager = RetryManager(config)
+ self.recovery_strategies = self._initialize_strategies()
+ self.active_recoveries = 0
+
+ def _initialize_strategies(self) -> Dict[str, Callable]:
+ """Initialize recovery strategy functions"""
+ return {
+ 'generic_retry': self._strategy_generic_retry,
+ 'exponential_backoff': self._strategy_exponential_backoff,
+ 'reconnect_with_backoff': self._strategy_reconnect_with_backoff,
+ 'rate_limit_backoff': self._strategy_rate_limit_backoff,
+ 'restart_with_increased_memory': self._strategy_restart_component,
+ 'refresh_credentials': self._strategy_refresh_credentials,
+ 'log_and_continue': self._strategy_log_and_continue,
+ }
+
+ async def recover(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> bool:
+ """
+ Attempt to recover from an error
+ Returns True if recovery was successful
+ """
+ self.active_recoveries += 1
+ start_time = time.time()
+
+ try:
+ self.logger.info(
+ f"Starting recovery for {classification.error_type} "
+ f"using strategy: {classification.recommended_strategy}"
+ )
+
+ # Get the recovery strategy
+ strategy = self.recovery_strategies.get(
+ classification.recommended_strategy,
+ self._strategy_generic_retry
+ )
+
+ # Execute recovery with retry logic
+ result = await self.retry_manager.execute_with_retry(
+ strategy,
+ error,
+ classification
+ )
+
+ duration = time.time() - start_time
+
+ if result.success:
+ self.logger.info(
+ f"✅ Recovery successful after {result.attempts} attempts "
+ f"in {duration:.2f}s"
+ )
+ else:
+ self.logger.warning(
+ f"❌ Recovery failed after {result.attempts} attempts "
+ f"in {duration:.2f}s: {result.error_message}"
+ )
+
+ return result.success
+
+ except Exception as e:
+ self.logger.error(f"Error during recovery process: {e}", exc_info=True)
+ return False
+ finally:
+ self.active_recoveries -= 1
+
+ # Recovery Strategies
+
+ async def _strategy_generic_retry(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> RecoveryResult:
+ """Generic retry strategy"""
+ self.logger.info("Executing generic retry strategy")
+
+ # Simple retry with small delay
+ await asyncio.sleep(1)
+
+ return RecoveryResult(
+ success=True,
+ attempts=1,
+ duration=1.0,
+ strategy_used='generic_retry'
+ )
+
+ async def _strategy_exponential_backoff(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> RecoveryResult:
+ """Exponential backoff retry strategy"""
+ max_attempts = self.config.get('max_retries', 5)
+ base_delay = self.config.get('retry_delay', 2)
+
+ for attempt in range(max_attempts):
+ delay = base_delay * (2 ** attempt)
+ self.logger.info(f"Attempt {attempt + 1}/{max_attempts}, waiting {delay}s...")
+
+ await asyncio.sleep(delay)
+
+ # Simulate retry success (in real implementation, retry the actual operation)
+ if attempt >= 2: # Success after 3 attempts
+ return RecoveryResult(
+ success=True,
+ attempts=attempt + 1,
+ duration=sum(base_delay * (2 ** i) for i in range(attempt + 1)),
+ strategy_used='exponential_backoff'
+ )
+
+ return RecoveryResult(
+ success=False,
+ attempts=max_attempts,
+ duration=sum(base_delay * (2 ** i) for i in range(max_attempts)),
+ strategy_used='exponential_backoff',
+ error_message="Max retries exceeded"
+ )
+
+ async def _strategy_reconnect_with_backoff(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> RecoveryResult:
+ """Reconnection strategy with exponential backoff"""
+ self.logger.info("Executing reconnect with backoff strategy")
+
+ max_attempts = 5
+ base_delay = 2
+
+ for attempt in range(max_attempts):
+ try:
+ delay = base_delay * (2 ** attempt)
+ self.logger.info(f"Reconnection attempt {attempt + 1}/{max_attempts}")
+
+ await asyncio.sleep(delay)
+
+ # Simulate reconnection logic
+ # In real implementation, attempt to reconnect to the service
+
+ if attempt >= 1: # Success after 2 attempts
+ return RecoveryResult(
+ success=True,
+ attempts=attempt + 1,
+ duration=sum(base_delay * (2 ** i) for i in range(attempt + 1)),
+ strategy_used='reconnect_with_backoff'
+ )
+
+ except Exception as e:
+ self.logger.warning(f"Reconnection attempt {attempt + 1} failed: {e}")
+ continue
+
+ return RecoveryResult(
+ success=False,
+ attempts=max_attempts,
+ duration=sum(base_delay * (2 ** i) for i in range(max_attempts)),
+ strategy_used='reconnect_with_backoff',
+ error_message="Failed to reconnect"
+ )
+
+ async def _strategy_rate_limit_backoff(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> RecoveryResult:
+ """Rate limit backoff strategy"""
+ self.logger.info("Executing rate limit backoff strategy")
+
+ # Wait longer for rate limits
+ delay = 60 # 1 minute
+ await asyncio.sleep(delay)
+
+ return RecoveryResult(
+ success=True,
+ attempts=1,
+ duration=delay,
+ strategy_used='rate_limit_backoff'
+ )
+
+ async def _strategy_restart_component(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> RecoveryResult:
+ """Restart component strategy (for critical errors)"""
+ self.logger.warning("Executing component restart strategy")
+
+ # Simulate component restart
+ await asyncio.sleep(5)
+
+ return RecoveryResult(
+ success=True,
+ attempts=1,
+ duration=5.0,
+ strategy_used='restart_with_increased_memory'
+ )
+
+ async def _strategy_refresh_credentials(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> RecoveryResult:
+ """Refresh authentication credentials"""
+ self.logger.info("Executing credential refresh strategy")
+
+ # Simulate credential refresh
+ await asyncio.sleep(2)
+
+ return RecoveryResult(
+ success=True,
+ attempts=1,
+ duration=2.0,
+ strategy_used='refresh_credentials'
+ )
+
+ async def _strategy_log_and_continue(
+ self,
+ error: Exception,
+ classification: ErrorClassification
+ ) -> RecoveryResult:
+ """Simply log the error and continue"""
+ self.logger.info(f"Logging error and continuing: {error}")
+
+ return RecoveryResult(
+ success=True,
+ attempts=1,
+ duration=0.0,
+ strategy_used='log_and_continue'
+ )
+
+ async def stop(self):
+ """Stop the recovery manager"""
+ # Wait for active recoveries to complete
+ while self.active_recoveries > 0:
+ self.logger.info(f"Waiting for {self.active_recoveries} active recoveries to complete...")
+ await asyncio.sleep(1)
+
+ self.logger.info("Auto recovery manager stopped")
\ No newline at end of file
diff --git a/src_error_handler_error_classifier.py b/src_error_handler_error_classifier.py
new file mode 100644
index 00000000..735b5c7c
--- /dev/null
+++ b/src_error_handler_error_classifier.py
@@ -0,0 +1,229 @@
+"""
+Error Classification System
+Automatically categorizes errors and determines handling strategy
+"""
+
+import logging
+import re
+from dataclasses import dataclass
+from enum import Enum
+from typing import Dict, Any, List
+
+
+class ErrorCategory(Enum):
+ """Error severity categories"""
+ CRITICAL = "critical"
+ RECOVERABLE = "recoverable"
+ IGNORABLE = "ignorable"
+ WARNING = "warning"
+
+
+class ErrorAction(Enum):
+ """Actions to take for different error types"""
+ ALERT_AND_RETRY = "alert_and_retry"
+ AUTO_RETRY = "auto_retry"
+ LOG_ONLY = "log_only"
+ ESCALATE = "escalate"
+
+
+@dataclass
+class ErrorClassification:
+ """Result of error classification"""
+ category: ErrorCategory
+ action: ErrorAction
+ error_type: str
+ severity_score: int # 1-10
+ is_critical: bool
+ requires_manual_intervention: bool
+ recommended_strategy: str
+ metadata: Dict[str, Any]
+
+
+class ErrorClassifier:
+ """Intelligent error classification system"""
+
+ def __init__(self, config: Dict[str, Any]):
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.error_rules = self._load_error_rules()
+ self.pattern_cache = {}
+
+ def _load_error_rules(self) -> Dict[str, Dict]:
+ """Load error handling rules from config"""
+ rules = {}
+
+ # Critical errors
+ for error_type in self.config.get('error_categories', {}).get('critical', []):
+ rules[error_type] = {
+ 'category': ErrorCategory.CRITICAL,
+ 'action': ErrorAction.ALERT_AND_RETRY,
+ 'severity': 9,
+ 'manual_intervention': True
+ }
+
+ # Recoverable errors
+ for error_type in self.config.get('error_categories', {}).get('recoverable', []):
+ rules[error_type] = {
+ 'category': ErrorCategory.RECOVERABLE,
+ 'action': ErrorAction.AUTO_RETRY,
+ 'severity': 5,
+ 'manual_intervention': False
+ }
+
+ # Ignorable errors
+ for error_type in self.config.get('error_categories', {}).get('ignorable', []):
+ rules[error_type] = {
+ 'category': ErrorCategory.IGNORABLE,
+ 'action': ErrorAction.LOG_ONLY,
+ 'severity': 2,
+ 'manual_intervention': False
+ }
+
+ return rules
+
+ def classify(self, error: Exception) -> ErrorClassification:
+ """Classify an error and determine handling strategy"""
+ error_type = type(error).__name__
+ error_message = str(error)
+
+ # Check if we have a specific rule for this error type
+ if error_type in self.error_rules:
+ rule = self.error_rules[error_type]
+ return self._create_classification(error_type, error_message, rule)
+
+ # Pattern-based classification
+ classification = self._classify_by_pattern(error_type, error_message)
+ if classification:
+ return classification
+
+ # Default classification for unknown errors
+ return self._create_default_classification(error_type, error_message)
+
+ def classify_exception(self, exception: Exception) -> ErrorClassification:
+ """Classify an exception (alias for classify)"""
+ return self.classify(exception)
+
+ def _classify_by_pattern(self, error_type: str, error_message: str) -> ErrorClassification:
+ """Classify error based on message patterns"""
+ patterns = {
+ r'connection.*refused|timeout|timed out': {
+ 'category': ErrorCategory.RECOVERABLE,
+ 'action': ErrorAction.AUTO_RETRY,
+ 'severity': 6,
+ 'strategy': 'exponential_backoff'
+ },
+ r'out of memory|oom|memory error': {
+ 'category': ErrorCategory.CRITICAL,
+ 'action': ErrorAction.ALERT_AND_RETRY,
+ 'severity': 10,
+ 'strategy': 'restart_with_increased_memory'
+ },
+ r'rate limit|too many requests|429': {
+ 'category': ErrorCategory.RECOVERABLE,
+ 'action': ErrorAction.AUTO_RETRY,
+ 'severity': 4,
+ 'strategy': 'rate_limit_backoff'
+ },
+ r'database.*connection|db error|sql': {
+ 'category': ErrorCategory.CRITICAL,
+ 'action': ErrorAction.ALERT_AND_RETRY,
+ 'severity': 8,
+ 'strategy': 'reconnect_with_backoff'
+ },
+ r'unauthorized|authentication|forbidden': {
+ 'category': ErrorCategory.CRITICAL,
+ 'action': ErrorAction.ESCALATE,
+ 'severity': 9,
+ 'strategy': 'refresh_credentials'
+ },
+ r'not found|404': {
+ 'category': ErrorCategory.WARNING,
+ 'action': ErrorAction.LOG_ONLY,
+ 'severity': 3,
+ 'strategy': 'log_and_continue'
+ }
+ }
+
+ for pattern, rule in patterns.items():
+ if re.search(pattern, error_message, re.IGNORECASE):
+ return self._create_classification(
+ error_type,
+ error_message,
+ {
+ 'category': rule['category'],
+ 'action': rule['action'],
+ 'severity': rule['severity'],
+ 'manual_intervention': rule['severity'] >= 8
+ },
+ recommended_strategy=rule['strategy']
+ )
+
+ return None
+
+ def _create_classification(
+ self,
+ error_type: str,
+ error_message: str,
+ rule: Dict,
+ recommended_strategy: str = None
+ ) -> ErrorClassification:
+ """Create an ErrorClassification object"""
+
+ # Determine strategy
+ if not recommended_strategy:
+ strategies = self.config.get('auto_fix_strategies', [])
+ for strategy_config in strategies:
+ if strategy_config.get('error_type') == error_type:
+ recommended_strategy = strategy_config.get('strategy', 'generic_retry')
+ break
+
+ if not recommended_strategy:
+ recommended_strategy = 'generic_retry'
+
+ return ErrorClassification(
+ category=rule['category'],
+ action=rule['action'],
+ error_type=error_type,
+ severity_score=rule['severity'],
+ is_critical=rule['severity'] >= 8,
+ requires_manual_intervention=rule.get('manual_intervention', False),
+ recommended_strategy=recommended_strategy,
+ metadata={
+ 'error_message': error_message,
+ 'timestamp': logging.Formatter().formatTime(logging.LogRecord(
+ '', 0, '', 0, '', (), None
+ ))
+ }
+ )
+
+ def _create_default_classification(
+ self,
+ error_type: str,
+ error_message: str
+ ) -> ErrorClassification:
+ """Create default classification for unknown errors"""
+ self.logger.warning(f"Unknown error type: {error_type}, using default classification")
+
+ return ErrorClassification(
+ category=ErrorCategory.WARNING,
+ action=ErrorAction.AUTO_RETRY,
+ error_type=error_type,
+ severity_score=5,
+ is_critical=False,
+ requires_manual_intervention=False,
+ recommended_strategy='generic_retry',
+ metadata={
+ 'error_message': error_message,
+ 'is_unknown': True
+ }
+ )
+
+ def get_statistics(self) -> Dict[str, int]:
+ """Get classification statistics"""
+ return {
+ 'total_rules': len(self.error_rules),
+ 'critical_rules': sum(1 for r in self.error_rules.values()
+ if r['category'] == ErrorCategory.CRITICAL),
+ 'recoverable_rules': sum(1 for r in self.error_rules.values()
+ if r['category'] == ErrorCategory.RECOVERABLE),
+ }
\ No newline at end of file
diff --git a/src_error_handler_retry_manager.py b/src_error_handler_retry_manager.py
new file mode 100644
index 00000000..6ef52535
--- /dev/null
+++ b/src_error_handler_retry_manager.py
@@ -0,0 +1,80 @@
+"""
+Retry Management System
+Handles retry logic with configurable policies
+"""
+
+import asyncio
+import logging
+from typing import Callable, Any, Dict
+from dataclasses import dataclass
+
+
+@dataclass
+class RetryPolicy:
+ """Configuration for retry behavior"""
+ max_retries: int = 5
+ base_delay: float = 2.0
+ max_delay: float = 60.0
+ exponential_backoff: bool = True
+ jitter: bool = True
+
+
+class RetryManager:
+ """Manages retry logic for failed operations"""
+
+ def __init__(self, config: Dict[str, Any]):
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.policy = RetryPolicy(
+ max_retries=config.get('max_retries', 5),
+ base_delay=config.get('retry_delay', 2.0),
+ exponential_backoff=config.get('exponential_backoff', True)
+ )
+
+ async def execute_with_retry(
+ self,
+ func: Callable,
+ *args,
+ **kwargs
+ ):
+ """
+ Execute a function with retry logic
+ Returns the result of the function or raises the last exception
+ """
+ last_exception = None
+
+ for attempt in range(self.policy.max_retries):
+ try:
+ result = await func(*args, **kwargs)
+ return result
+
+ except Exception as e:
+ last_exception = e
+ self.logger.warning(
+ f"Attempt {attempt + 1}/{self.policy.max_retries} failed: {e}"
+ )
+
+ if attempt < self.policy.max_retries - 1:
+ delay = self._calculate_delay(attempt)
+ await asyncio.sleep(delay)
+
+ # All retries exhausted
+ if last_exception:
+ raise last_exception
+
+ def _calculate_delay(self, attempt: int) -> float:
+ """Calculate delay before next retry"""
+ if self.policy.exponential_backoff:
+ delay = min(
+ self.policy.base_delay * (2 ** attempt),
+ self.policy.max_delay
+ )
+ else:
+ delay = self.policy.base_delay
+
+ # Add jitter if enabled
+ if self.policy.jitter:
+ import random
+ delay *= (0.5 + random.random())
+
+ return delay
\ No newline at end of file
diff --git a/src_pr_automation_pr_manager.py b/src_pr_automation_pr_manager.py
new file mode 100644
index 00000000..422f33e1
--- /dev/null
+++ b/src_pr_automation_pr_manager.py
@@ -0,0 +1,221 @@
+"""
+Pull Request Management System
+Automatically creates, reviews, and merges pull requests
+"""
+
+import asyncio
+import logging
+from typing import Dict, Any, Optional, List
+from datetime import datetime
+import aiohttp
+
+
+class PRManager:
+ """Manages GitHub pull requests automatically"""
+
+ def __init__(self, config: Dict[str, Any]):
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.github_token = config.get('github_token', '')
+ self.repo_owner = config.get('repo_owner', '')
+ self.repo_name = config.get('repo_name', '')
+ self.base_branch = config.get('base_branch', 'master')
+ self.api_base = 'https://api.github.com'
+
+ async def create_pr(
+ self,
+ title: str,
+ body: str,
+ head_branch: str,
+ labels: List[str] = None
+ ) -> Optional[Dict]:
+ """Create a new pull request"""
+ self.logger.info(f"Creating PR: {title}")
+
+ url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls"
+
+ headers = {
+ 'Authorization': f'token {self.github_token}',
+ 'Accept': 'application/vnd.github.v3+json'
+ }
+
+ data = {
+ 'title': title,
+ 'body': body,
+ 'head': head_branch,
+ 'base': self.base_branch
+ }
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.post(url, json=data, headers=headers) as response:
+ if response.status == 201:
+ pr_data = await response.json()
+ pr_number = pr_data['number']
+
+ self.logger.info(f"✅ PR #{pr_number} created successfully")
+
+ # Add labels if provided
+ if labels:
+ await self._add_labels(pr_number, labels)
+
+ return pr_data
+ else:
+ error_text = await response.text()
+ self.logger.error(f"Failed to create PR: {error_text}")
+ return None
+
+ except Exception as e:
+ self.logger.error(f"Error creating PR: {e}", exc_info=True)
+ return None
+
+ async def _add_labels(self, pr_number: int, labels: List[str]):
+ """Add labels to a pull request"""
+ url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/issues/{pr_number}/labels"
+
+ headers = {
+ 'Authorization': f'token {self.github_token}',
+ 'Accept': 'application/vnd.github.v3+json'
+ }
+
+ data = {'labels': labels}
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.post(url, json=data, headers=headers) as response:
+ if response.status == 200:
+ self.logger.info(f"Labels added to PR #{pr_number}")
+ else:
+ self.logger.warning(f"Failed to add labels to PR #{pr_number}")
+
+ except Exception as e:
+ self.logger.error(f"Error adding labels: {e}")
+
+ async def review_pr(self, pr_number: int) -> bool:
+ """Automatically review a pull request"""
+ self.logger.info(f"Reviewing PR #{pr_number}")
+
+ # Get PR details
+ pr_data = await self._get_pr(pr_number)
+ if not pr_data:
+ return False
+
+ # Run automated checks
+ checks_passed = await self._run_automated_checks(pr_data)
+
+ # Submit review
+ return await self._submit_review(pr_number, checks_passed)
+
+ async def _get_pr(self, pr_number: int) -> Optional[Dict]:
+ """Get pull request details"""
+ url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls/{pr_number}"
+
+ headers = {
+ 'Authorization': f'token {self.github_token}',
+ 'Accept': 'application/vnd.github.v3+json'
+ }
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url, headers=headers) as response:
+ if response.status == 200:
+ return await response.json()
+ return None
+
+ except Exception as e:
+ self.logger.error(f"Error getting PR: {e}")
+ return None
+
+ async def _run_automated_checks(self, pr_data: Dict) -> bool:
+ """Run automated checks on PR"""
+ self.logger.info("Running automated checks...")
+
+ # Simulate checks
+ await asyncio.sleep(2)
+
+ # In real implementation:
+ # - Check code quality
+ # - Run tests
+ # - Check for conflicts
+ # - Validate commit messages
+ # - Check file changes
+
+ return True
+
+ async def _submit_review(self, pr_number: int, approved: bool) -> bool:
+ """Submit review for PR"""
+ url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls/{pr_number}/reviews"
+
+ headers = {
+ 'Authorization': f'token {self.github_token}',
+ 'Accept': 'application/vnd.github.v3+json'
+ }
+
+ event = 'APPROVE' if approved else 'REQUEST_CHANGES'
+ body = 'Automated review: All checks passed ✅' if approved else 'Automated review: Issues found ❌'
+
+ data = {
+ 'event': event,
+ 'body': body
+ }
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.post(url, json=data, headers=headers) as response:
+ if response.status == 200:
+ self.logger.info(f"Review submitted for PR #{pr_number}: {event}")
+ return True
+ return False
+
+ except Exception as e:
+ self.logger.error(f"Error submitting review: {e}")
+ return False
+
+ async def merge_pr(self, pr_number: int, merge_method: str = 'squash') -> bool:
+ """Merge a pull request"""
+ self.logger.info(f"Merging PR #{pr_number} using {merge_method}")
+
+ url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls/{pr_number}/merge"
+
+ headers = {
+ 'Authorization': f'token {self.github_token}',
+ 'Accept': 'application/vnd.github.v3+json'
+ }
+
+ data = {
+ 'merge_method': merge_method
+ }
+
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.put(url, json=data, headers=headers) as response:
+ if response.status == 200:
+ self.logger.info(f"✅ PR #{pr_number} merged successfully")
+ return True
+ else:
+ error_text = await response.text()
+ self.logger.error(f"Failed to merge PR: {error_text}")
+ return False
+
+ except Exception as e:
+ self.logger.error(f"Error merging PR: {e}", exc_info=True)
+ return False
+
+ async def auto_process_pr(self, pr_number: int) -> bool:
+ """Automatically review and merge a PR if all checks pass"""
+ self.logger.info(f"Auto-processing PR #{pr_number}")
+
+ # Review PR
+ review_passed = await self.review_pr(pr_number)
+
+ if not review_passed:
+ self.logger.warning(f"PR #{pr_number} did not pass review")
+ return False
+
+ # Wait a bit before merging
+ await asyncio.sleep(5)
+
+ # Merge PR
+ merge_success = await self.merge_pr(pr_number)
+
+ return merge_success
\ No newline at end of file
diff --git a/src_stress_tester_main.py b/src_stress_tester_main.py
new file mode 100644
index 00000000..5fb0ba39
--- /dev/null
+++ b/src_stress_tester_main.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+"""
+Intelligent Stress Testing System - Main Entry Point
+Handles continuous stress testing with automatic error recovery
+"""
+
+import asyncio
+import logging
+import signal
+import sys
+from pathlib import Path
+from typing import Optional
+
+import yaml
+from stress_engine import StressTestEngine
+from network_simulator import NetworkSimulator
+from error_handler.error_classifier import ErrorClassifier
+from error_handler.auto_recovery import AutoRecoveryManager
+from dashboard.backend.metrics_collector import MetricsCollector
+from common.logger import setup_logging
+from common.utils import load_config
+
+
+class StressTestingSystem:
+ """Main orchestrator for the intelligent stress testing system"""
+
+ def __init__(self, config_path: str):
+ self.config = load_config(config_path)
+ self.logger = setup_logging(self.config.get('logging', {}))
+
+ # Initialize components
+ self.stress_engine = StressTestEngine(self.config['stress_test'])
+ self.network_simulator = NetworkSimulator(self.config.get('network', {}))
+ self.error_classifier = ErrorClassifier(self.config['error_handling'])
+ self.recovery_manager = AutoRecoveryManager(self.config['error_handling'])
+ self.metrics_collector = MetricsCollector(self.config['monitoring'])
+
+ self.running = False
+ self._setup_signal_handlers()
+
+ def _setup_signal_handlers(self):
+ """Setup graceful shutdown on SIGINT/SIGTERM"""
+ signal.signal(signal.SIGINT, self._signal_handler)
+ signal.signal(signal.SIGTERM, self._signal_handler)
+
+ def _signal_handler(self, signum, frame):
+ """Handle shutdown signals gracefully"""
+ self.logger.info(f"Received signal {signum}, initiating graceful shutdown...")
+ self.running = False
+
+ async def start(self):
+ """Start the stress testing system"""
+ self.logger.info("🚀 Starting Intelligent Stress Testing System...")
+ self.running = True
+
+ # Start all components
+ tasks = [
+ asyncio.create_task(self._run_stress_tests()),
+ asyncio.create_task(self._monitor_system()),
+ asyncio.create_task(self._collect_metrics()),
+ ]
+
+ try:
+ await asyncio.gather(*tasks)
+ except Exception as e:
+ self.logger.error(f"Critical error in main loop: {e}", exc_info=True)
+ finally:
+ await self.shutdown()
+
+ async def _run_stress_tests(self):
+ """Main stress testing loop with error recovery"""
+ self.logger.info("📊 Starting stress test execution...")
+
+ while self.running:
+ try:
+ # Execute stress test
+ results = await self.stress_engine.run_test()
+
+ # Collect metrics
+ await self.metrics_collector.record_test_results(results)
+
+ # Check for errors
+ if results.has_errors():
+ await self._handle_errors(results.errors)
+
+ except Exception as e:
+ self.logger.error(f"Error in stress test loop: {e}")
+ # Classify and handle error in background
+ await self._handle_exception(e)
+ # Continue testing without interruption
+ continue
+
+ await asyncio.sleep(self.config['stress_test'].get('test_interval', 1))
+
+ async def _handle_errors(self, errors: list):
+ """Handle errors detected during stress testing"""
+ for error in errors:
+ # Classify error
+ classification = self.error_classifier.classify(error)
+
+ self.logger.info(f"Error classified as: {classification.category}")
+
+ # Record error metrics
+ await self.metrics_collector.record_error(classification)
+
+ # Handle based on classification
+ if classification.requires_manual_intervention:
+ self.logger.warning(f"⚠️ Manual intervention required: {error}")
+ await self.metrics_collector.increment_manual_intervention_count()
+ else:
+ # Start automatic recovery in background
+ asyncio.create_task(
+ self._auto_recover(error, classification)
+ )
+
+ async def _handle_exception(self, exception: Exception):
+ """Handle unexpected exceptions"""
+ classification = self.error_classifier.classify_exception(exception)
+
+ if classification.is_critical:
+ self.logger.critical(f"🚨 Critical error: {exception}")
+ await self.metrics_collector.record_critical_error(exception)
+ else:
+ # Attempt automatic recovery
+ asyncio.create_task(
+ self._auto_recover(exception, classification)
+ )
+
+ async def _auto_recover(self, error, classification):
+ """Attempt automatic error recovery in background"""
+ self.logger.info(f"🔧 Starting automatic recovery for: {error}")
+
+ try:
+ # Increment active recovery processes
+ await self.metrics_collector.increment_active_recovery()
+
+ # Attempt recovery with retry logic
+ success = await self.recovery_manager.recover(error, classification)
+
+ if success:
+ self.logger.info(f"✅ Successfully recovered from: {error}")
+ await self.metrics_collector.increment_fixed_errors()
+ else:
+ self.logger.warning(f"❌ Failed to recover from: {error}")
+ await self.metrics_collector.increment_bypassed_errors()
+
+ except Exception as e:
+ self.logger.error(f"Error during recovery: {e}")
+ finally:
+ await self.metrics_collector.decrement_active_recovery()
+
+ async def _monitor_system(self):
+ """Monitor system health and resources"""
+ self.logger.info("🔍 Starting system monitoring...")
+
+ while self.running:
+ try:
+ # Collect system metrics
+ metrics = await self.network_simulator.get_network_stats()
+ await self.metrics_collector.record_system_metrics(metrics)
+
+ except Exception as e:
+ self.logger.error(f"Error in monitoring: {e}")
+
+ await asyncio.sleep(self.config['monitoring'].get('metrics_interval', 10))
+
+ async def _collect_metrics(self):
+ """Aggregate and store metrics"""
+ self.logger.info("📈 Starting metrics collection...")
+
+ while self.running:
+ try:
+ await self.metrics_collector.aggregate_and_store()
+ except Exception as e:
+ self.logger.error(f"Error collecting metrics: {e}")
+
+ await asyncio.sleep(self.config['monitoring'].get('aggregation_interval', 60))
+
+ async def shutdown(self):
+ """Graceful shutdown of all components"""
+ self.logger.info("🛑 Shutting down Intelligent Stress Testing System...")
+
+ # Stop all components
+ await self.stress_engine.stop()
+ await self.recovery_manager.stop()
+ await self.metrics_collector.close()
+
+ self.logger.info("✅ Shutdown complete")
+
+
+async def main():
+ """Main entry point"""
+ config_path = sys.argv[1] if len(sys.argv) > 1 else 'config/stress_test_config.yaml'
+
+ system = StressTestingSystem(config_path)
+ await system.start()
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
\ No newline at end of file
diff --git a/src_stress_tester_network_simulator.py b/src_stress_tester_network_simulator.py
new file mode 100644
index 00000000..55b0cab3
--- /dev/null
+++ b/src_stress_tester_network_simulator.py
@@ -0,0 +1,138 @@
+"""
+Network Simulation and Monitoring
+Simulates network conditions and monitors network statistics
+"""
+
+import asyncio
+import logging
+import psutil
+import time
+from typing import Dict, Any
+from dataclasses import dataclass
+
+
+@dataclass
+class NetworkStats:
+ """Network statistics container"""
+ bytes_sent: int
+ bytes_recv: int
+ packets_sent: int
+ packets_recv: int
+ errin: int
+ errout: int
+ dropin: int
+ dropout: int
+ timestamp: float
+
+
+class NetworkSimulator:
+ """Network simulation and monitoring system"""
+
+ def __init__(self, config: Dict[str, Any]):
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.enabled = config.get('enabled', True)
+ self.latency_ms = config.get('latency_ms', 0)
+ self.packet_loss = config.get('packet_loss', 0.0)
+ self.bandwidth_limit = config.get('bandwidth_limit_mbps', 0)
+
+ async def get_network_stats(self) -> Dict[str, Any]:
+ """Get current network statistics"""
+ try:
+ # Get network I/O statistics
+ net_io = psutil.net_io_counters()
+
+ stats = NetworkStats(
+ bytes_sent=net_io.bytes_sent,
+ bytes_recv=net_io.bytes_recv,
+ packets_sent=net_io.packets_sent,
+ packets_recv=net_io.packets_recv,
+ errin=net_io.errin,
+ errout=net_io.errout,
+ dropin=net_io.dropin,
+ dropout=net_io.dropout,
+ timestamp=time.time()
+ )
+
+ # Calculate load percentage
+ load = self._calculate_load(stats)
+
+ return {
+ 'bytes_sent': stats.bytes_sent,
+ 'bytes_recv': stats.bytes_recv,
+ 'packets_sent': stats.packets_sent,
+ 'packets_recv': stats.packets_recv,
+ 'errors': stats.errin + stats.errout,
+ 'drops': stats.dropin + stats.dropout,
+ 'load': load,
+ 'timestamp': stats.timestamp
+ }
+
+ except Exception as e:
+ self.logger.error(f"Error getting network stats: {e}")
+ return {
+ 'load': 0.0,
+ 'errors': 0,
+ 'timestamp': time.time()
+ }
+
+ def _calculate_load(self, stats: NetworkStats) -> float:
+ """Calculate network load percentage"""
+ # Simple load calculation based on packet rate
+ # In real implementation, this would be more sophisticated
+
+ if hasattr(self, '_last_stats'):
+ time_delta = stats.timestamp - self._last_stats.timestamp
+ if time_delta > 0:
+ packets_per_sec = (
+ (stats.packets_sent - self._last_stats.packets_sent) +
+ (stats.packets_recv - self._last_stats.packets_recv)
+ ) / time_delta
+
+ # Assume max capacity of 10000 packets/sec
+ load = min((packets_per_sec / 10000) * 100, 100)
+ else:
+ load = 0.0
+ else:
+ load = 0.0
+
+ self._last_stats = stats
+ return load
+
+ async def simulate_latency(self, duration_ms: int = None):
+ """Simulate network latency"""
+ if not self.enabled:
+ return
+
+ latency = duration_ms if duration_ms else self.latency_ms
+ if latency > 0:
+ await asyncio.sleep(latency / 1000)
+
+ async def simulate_packet_loss(self) -> bool:
+ """
+ Simulate packet loss
+ Returns True if packet should be dropped
+ """
+ if not self.enabled or self.packet_loss <= 0:
+ return False
+
+ import random
+ return random.random() < (self.packet_loss / 100)
+
+ def get_system_info(self) -> Dict[str, Any]:
+ """Get system information"""
+ try:
+ cpu_percent = psutil.cpu_percent(interval=1)
+ memory = psutil.virtual_memory()
+ disk = psutil.disk_usage('/')
+
+ return {
+ 'cpu_percent': cpu_percent,
+ 'memory_percent': memory.percent,
+ 'memory_available_gb': memory.available / (1024**3),
+ 'disk_percent': disk.percent,
+ 'disk_free_gb': disk.free / (1024**3)
+ }
+ except Exception as e:
+ self.logger.error(f"Error getting system info: {e}")
+ return {}
\ No newline at end of file
diff --git a/src_stress_tester_stress_engine.py b/src_stress_tester_stress_engine.py
new file mode 100644
index 00000000..3c2c2b24
--- /dev/null
+++ b/src_stress_tester_stress_engine.py
@@ -0,0 +1,119 @@
+"""
+Stress Testing Engine
+Handles load generation and test execution
+"""
+
+import asyncio
+import time
+from typing import Dict, List, Any
+from dataclasses import dataclass, field
+import aiohttp
+import logging
+
+
+@dataclass
+class StressTestResult:
+ """Container for stress test results"""
+ start_time: float
+ end_time: float
+ total_requests: int = 0
+ successful_requests: int = 0
+ failed_requests: int = 0
+ average_response_time: float = 0.0
+ errors: List[Exception] = field(default_factory=list)
+
+ def has_errors(self) -> bool:
+ return len(self.errors) > 0
+
+ def success_rate(self) -> float:
+ if self.total_requests == 0:
+ return 0.0
+ return (self.successful_requests / self.total_requests) * 100
+
+
+class StressTestEngine:
+ """Executes stress tests with configurable load patterns"""
+
+ def __init__(self, config: Dict[str, Any]):
+ self.config = config
+ self.logger = logging.getLogger(__name__)
+ self.session: Optional[aiohttp.ClientSession] = None
+ self.running = False
+
+ async def run_test(self) -> StressTestResult:
+ """Execute a stress test cycle"""
+ start_time = time.time()
+ result = StressTestResult(start_time=start_time, end_time=0)
+
+ # Initialize session if needed
+ if not self.session:
+ self.session = aiohttp.ClientSession()
+
+ # Get test configuration
+ max_concurrent = self.config.get('max_concurrent_users', 100)
+ endpoints = self.config.get('endpoints', [])
+
+ # Create tasks for concurrent requests
+ tasks = []
+ for i in range(max_concurrent):
+ endpoint = self._select_endpoint(endpoints, i)
+ task = asyncio.create_task(self._execute_request(endpoint, result))
+ tasks.append(task)
+
+ # Wait for all requests to complete
+ await asyncio.gather(*tasks, return_exceptions=True)
+
+ result.end_time = time.time()
+ result.average_response_time = self._calculate_avg_response_time(result)
+
+ self.logger.info(
+ f"Stress test completed: {result.successful_requests}/{result.total_requests} successful "
+ f"({result.success_rate():.2f}%)"
+ )
+
+ return result
+
+ def _select_endpoint(self, endpoints: List[Dict], index: int) -> Dict:
+ """Select endpoint based on weight distribution"""
+ if not endpoints:
+ return {'path': '/', 'method': 'GET'}
+
+ # Simple round-robin for now (can be enhanced with weighted selection)
+ return endpoints[index % len(endpoints)]
+
+ async def _execute_request(self, endpoint: Dict, result: StressTestResult):
+ """Execute a single HTTP request"""
+ result.total_requests += 1
+
+ try:
+ method = endpoint.get('method', 'GET')
+ path = endpoint.get('path', '/')
+ url = f"{self.config.get('base_url', 'http://localhost')}{path}"
+
+ async with self.session.request(method, url, timeout=aiohttp.ClientTimeout(total=30)) as response:
+ await response.read()
+
+ if 200 <= response.status < 300:
+ result.successful_requests += 1
+ else:
+ result.failed_requests += 1
+
+ except Exception as e:
+ result.failed_requests += 1
+ result.errors.append(e)
+ self.logger.debug(f"Request failed: {e}")
+
+ def _calculate_avg_response_time(self, result: StressTestResult) -> float:
+ """Calculate average response time"""
+ if result.total_requests == 0:
+ return 0.0
+
+ duration = result.end_time - result.start_time
+ return duration / result.total_requests
+
+ async def stop(self):
+ """Stop the stress test engine"""
+ self.running = False
+ if self.session:
+ await self.session.close()
+ self.logger.info("Stress test engine stopped")
\ No newline at end of file
diff --git a/stress-testing-system b/stress-testing-system
new file mode 100644
index 00000000..be87331f
--- /dev/null
+++ b/stress-testing-system
@@ -0,0 +1,13 @@
+stress-testing-system/
+├── src/
+│ ├── stress_tester/
+│ ├── error_handler/
+│ ├── pr_automation/
+│ ├── dashboard/
+│ └── common/
+├── tests/
+├── scripts/
+├── docker/
+├── .github/workflows/
+├── config/
+└── docs/