From b465a22f48e625a20bf3cfdccb01bc364b6e1009 Mon Sep 17 00:00:00 2001 From: Ze0ro99 <146000493+Ze0ro99@users.noreply.github.com> Date: Sun, 8 Feb 2026 09:36:30 +0300 Subject: [PATCH 1/4] Create initial directory structure for stress testing Initialize the stress testing system directory structure. --- stress-testing-system | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 stress-testing-system diff --git a/stress-testing-system b/stress-testing-system new file mode 100644 index 00000000..be87331f --- /dev/null +++ b/stress-testing-system @@ -0,0 +1,13 @@ +stress-testing-system/ +├── src/ +│ ├── stress_tester/ +│ ├── error_handler/ +│ ├── pr_automation/ +│ ├── dashboard/ +│ └── common/ +├── tests/ +├── scripts/ +├── docker/ +├── .github/workflows/ +├── config/ +└── docs/ From e51d3641195da62f5442b90b3a740eede1f0f341 Mon Sep 17 00:00:00 2001 From: Ze0ro99 <146000493+Ze0ro99@users.noreply.github.com> Date: Sun, 8 Feb 2026 09:44:41 +0300 Subject: [PATCH 2/4] Add files via upload Error Handler & Recovery System PR Automation Dashboard (Frontend & Backend) Docker Configuration GitHub Actions Workflows Tests Documentation Dashboard Frontend (HTML/JS/CSS) Network Simulator Common utilities (logger, utils, constants) Docker configurations GitHub Actions workflows Configuration files Tests Documentation Scripts --- config_error_handling_rules.yaml.txt | 110 +++++++++ config_stress_test_config.yaml (1).txt | 143 +++++++++++ config_stress_test_config.yaml (2).txt | 143 +++++++++++ config_stress_test_config.yaml.txt | 143 +++++++++++ src_common_constants.py | 74 ++++++ src_common_logger.py | 75 ++++++ src_common_utils.py | 137 +++++++++++ src_dashboard_backend_api.py | 89 +++++++ src_dashboard_backend_metrics_collector.py | 146 +++++++++++ src_dashboard_frontend_index.html | 156 ++++++++++++ src_dashboard_frontend_styles.css | 243 +++++++++++++++++++ src_error_handler_auto_recovery.py | 269 +++++++++++++++++++++ src_error_handler_error_classifier.py | 229 ++++++++++++++++++ src_error_handler_retry_manager.py | 80 ++++++ src_pr_automation_pr_manager.py | 221 +++++++++++++++++ src_stress_tester_main.py | 200 +++++++++++++++ src_stress_tester_network_simulator.py | 138 +++++++++++ src_stress_tester_stress_engine.py | 119 +++++++++ 18 files changed, 2715 insertions(+) create mode 100644 config_error_handling_rules.yaml.txt create mode 100644 config_stress_test_config.yaml (1).txt create mode 100644 config_stress_test_config.yaml (2).txt create mode 100644 config_stress_test_config.yaml.txt create mode 100644 src_common_constants.py create mode 100644 src_common_logger.py create mode 100644 src_common_utils.py create mode 100644 src_dashboard_backend_api.py create mode 100644 src_dashboard_backend_metrics_collector.py create mode 100644 src_dashboard_frontend_index.html create mode 100644 src_dashboard_frontend_styles.css create mode 100644 src_error_handler_auto_recovery.py create mode 100644 src_error_handler_error_classifier.py create mode 100644 src_error_handler_retry_manager.py create mode 100644 src_pr_automation_pr_manager.py create mode 100644 src_stress_tester_main.py create mode 100644 src_stress_tester_network_simulator.py create mode 100644 src_stress_tester_stress_engine.py diff --git a/config_error_handling_rules.yaml.txt b/config_error_handling_rules.yaml.txt new file mode 100644 index 00000000..cefe05f8 --- /dev/null +++ b/config_error_handling_rules.yaml.txt @@ -0,0 +1,110 @@ +# Error Handling Rules Configuration + +# Error Category Definitions +error_categories: + critical: + - "DatabaseConnectionError" + - "OutOfMemoryError" + - "SystemError" + - "FatalError" + - "SecurityError" + action: "alert_and_retry" + + recoverable: + - "TimeoutError" + - "ConnectionError" + - "RateLimitError" + - "TemporaryError" + - "NetworkError" + - "ServiceUnavailableError" + action: "auto_retry" + + ignorable: + - "ClientCancellationError" + - "NotFoundError" + - "ValidationError" + action: "log_only" + + warning: + - "DeprecationWarning" + - "ConfigurationWarning" + action: "log_only" + +# Recovery Strategy Definitions +auto_fix_strategies: + - error_type: "DatabaseConnectionError" + strategy: "reconnect_with_backoff" + max_attempts: 5 + base_delay: 2 + + - error_type: "RateLimitError" + strategy: "rate_limit_backoff" + max_attempts: 3 + base_delay: 60 + + - error_type: "TimeoutError" + strategy: "exponential_backoff" + max_attempts: 5 + base_delay: 2 + + - error_type: "ConnectionError" + strategy: "reconnect_with_backoff" + max_attempts: 5 + base_delay: 2 + + - error_type: "OutOfMemoryError" + strategy: "restart_with_increased_memory" + max_attempts: 1 + base_delay: 10 + + - error_type: "SecurityError" + strategy: "refresh_credentials" + max_attempts: 3 + base_delay: 5 + +# Pattern-based Error Classification +error_patterns: + - pattern: "connection.*refused|timeout|timed out" + category: "recoverable" + strategy: "exponential_backoff" + severity: 6 + + - pattern: "out of memory|oom|memory error" + category: "critical" + strategy: "restart_with_increased_memory" + severity: 10 + + - pattern: "rate limit|too many requests|429" + category: "recoverable" + strategy: "rate_limit_backoff" + severity: 4 + + - pattern: "database.*connection|db error|sql" + category: "critical" + strategy: "reconnect_with_backoff" + severity: 8 + + - pattern: "unauthorized|authentication|forbidden" + category: "critical" + strategy: "refresh_credentials" + severity: 9 + + - pattern: "not found|404" + category: "warning" + strategy: "log_and_continue" + severity: 3 + +# Alert Configuration +alerts: + # Enable email alerts + email_enabled: false + email_recipients: + - "devops@example.com" + + # Enable Slack alerts + slack_enabled: false + slack_webhook: "${SLACK_WEBHOOK_URL}" + + # Enable PagerDuty alerts + pagerduty_enabled: false + pagerduty_key: "${PAGERDUTY_KEY}" \ No newline at end of file diff --git a/config_stress_test_config.yaml (1).txt b/config_stress_test_config.yaml (1).txt new file mode 100644 index 00000000..41963a7b --- /dev/null +++ b/config_stress_test_config.yaml (1).txt @@ -0,0 +1,143 @@ +# Intelligent Stress Testing System Configuration + +# Stress Test Configuration +stress_test: + # Test duration in seconds (0 = infinite) + duration: 3600 + + # Maximum concurrent users/connections + max_concurrent_users: 1000 + + # Ramp-up time in seconds + ramp_up_time: 300 + + # Base URL for testing + base_url: "http://localhost:8000" + + # Test interval between cycles (seconds) + test_interval: 1 + + # Endpoints to test + endpoints: + - path: "/api/transactions" + method: "POST" + weight: 0.6 + - path: "/api/accounts" + method: "GET" + weight: 0.3 + - path: "/api/status" + method: "GET" + weight: 0.1 + +# Error Handling Configuration +error_handling: + # Maximum retry attempts + max_retries: 5 + + # Base retry delay in seconds + retry_delay: 2 + + # Use exponential backoff + exponential_backoff: true + + # Enable automatic recovery + auto_recovery: true + + # Error category definitions + error_categories: + critical: + - "DatabaseConnectionError" + - "OutOfMemoryError" + - "SystemError" + recoverable: + - "TimeoutError" + - "ConnectionError" + - "RateLimitError" + - "TemporaryError" + ignorable: + - "ClientCancellationError" + - "NotFoundError" + + # Auto-fix strategies + auto_fix_strategies: + - error_type: "DatabaseConnectionError" + strategy: "reconnect_with_backoff" + - error_type: "RateLimitError" + strategy: "rate_limit_backoff" + - error_type: "TimeoutError" + strategy: "exponential_backoff" + - error_type: "ConnectionError" + strategy: "reconnect_with_backoff" + +# Monitoring Configuration +monitoring: + # Metrics collection interval (seconds) + metrics_interval: 10 + + # Metrics aggregation interval (seconds) + aggregation_interval: 60 + + # Dashboard port + dashboard_port: 8080 + + # Enable alerts + enable_alerts: true + + # Alert thresholds + alert_thresholds: + error_rate: 10.0 # percentage + stress_load: 90.0 # percentage + + # Storage path for metrics + storage_path: "data/metrics.json" + +# Network Simulation (optional) +network: + # Enable network simulation + enabled: false + + # Simulated latency in milliseconds + latency_ms: 0 + + # Packet loss percentage + packet_loss: 0.0 + + # Bandwidth limit in Mbps + bandwidth_limit_mbps: 0 + +# Pull Request Automation +pr_automation: + # GitHub repository owner + repo_owner: "PiCoreTeam" + + # GitHub repository name + repo_name: "pi-node-docker" + + # Base branch for PRs + base_branch: "master" + + # GitHub token (use environment variable) + github_token: "${GITHUB_TOKEN}" + + # Auto-merge approved PRs + auto_merge: true + + # Merge method (merge, squash, rebase) + merge_method: "squash" + +# Logging Configuration +logging: + # Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + level: "INFO" + + # Log file path + file: "logs/stress_testing.log" + + # Log format + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + + # Maximum log file size in bytes + max_bytes: 10485760 # 10MB + + # Number of backup log files + backup_count: 5 \ No newline at end of file diff --git a/config_stress_test_config.yaml (2).txt b/config_stress_test_config.yaml (2).txt new file mode 100644 index 00000000..41963a7b --- /dev/null +++ b/config_stress_test_config.yaml (2).txt @@ -0,0 +1,143 @@ +# Intelligent Stress Testing System Configuration + +# Stress Test Configuration +stress_test: + # Test duration in seconds (0 = infinite) + duration: 3600 + + # Maximum concurrent users/connections + max_concurrent_users: 1000 + + # Ramp-up time in seconds + ramp_up_time: 300 + + # Base URL for testing + base_url: "http://localhost:8000" + + # Test interval between cycles (seconds) + test_interval: 1 + + # Endpoints to test + endpoints: + - path: "/api/transactions" + method: "POST" + weight: 0.6 + - path: "/api/accounts" + method: "GET" + weight: 0.3 + - path: "/api/status" + method: "GET" + weight: 0.1 + +# Error Handling Configuration +error_handling: + # Maximum retry attempts + max_retries: 5 + + # Base retry delay in seconds + retry_delay: 2 + + # Use exponential backoff + exponential_backoff: true + + # Enable automatic recovery + auto_recovery: true + + # Error category definitions + error_categories: + critical: + - "DatabaseConnectionError" + - "OutOfMemoryError" + - "SystemError" + recoverable: + - "TimeoutError" + - "ConnectionError" + - "RateLimitError" + - "TemporaryError" + ignorable: + - "ClientCancellationError" + - "NotFoundError" + + # Auto-fix strategies + auto_fix_strategies: + - error_type: "DatabaseConnectionError" + strategy: "reconnect_with_backoff" + - error_type: "RateLimitError" + strategy: "rate_limit_backoff" + - error_type: "TimeoutError" + strategy: "exponential_backoff" + - error_type: "ConnectionError" + strategy: "reconnect_with_backoff" + +# Monitoring Configuration +monitoring: + # Metrics collection interval (seconds) + metrics_interval: 10 + + # Metrics aggregation interval (seconds) + aggregation_interval: 60 + + # Dashboard port + dashboard_port: 8080 + + # Enable alerts + enable_alerts: true + + # Alert thresholds + alert_thresholds: + error_rate: 10.0 # percentage + stress_load: 90.0 # percentage + + # Storage path for metrics + storage_path: "data/metrics.json" + +# Network Simulation (optional) +network: + # Enable network simulation + enabled: false + + # Simulated latency in milliseconds + latency_ms: 0 + + # Packet loss percentage + packet_loss: 0.0 + + # Bandwidth limit in Mbps + bandwidth_limit_mbps: 0 + +# Pull Request Automation +pr_automation: + # GitHub repository owner + repo_owner: "PiCoreTeam" + + # GitHub repository name + repo_name: "pi-node-docker" + + # Base branch for PRs + base_branch: "master" + + # GitHub token (use environment variable) + github_token: "${GITHUB_TOKEN}" + + # Auto-merge approved PRs + auto_merge: true + + # Merge method (merge, squash, rebase) + merge_method: "squash" + +# Logging Configuration +logging: + # Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + level: "INFO" + + # Log file path + file: "logs/stress_testing.log" + + # Log format + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + + # Maximum log file size in bytes + max_bytes: 10485760 # 10MB + + # Number of backup log files + backup_count: 5 \ No newline at end of file diff --git a/config_stress_test_config.yaml.txt b/config_stress_test_config.yaml.txt new file mode 100644 index 00000000..41963a7b --- /dev/null +++ b/config_stress_test_config.yaml.txt @@ -0,0 +1,143 @@ +# Intelligent Stress Testing System Configuration + +# Stress Test Configuration +stress_test: + # Test duration in seconds (0 = infinite) + duration: 3600 + + # Maximum concurrent users/connections + max_concurrent_users: 1000 + + # Ramp-up time in seconds + ramp_up_time: 300 + + # Base URL for testing + base_url: "http://localhost:8000" + + # Test interval between cycles (seconds) + test_interval: 1 + + # Endpoints to test + endpoints: + - path: "/api/transactions" + method: "POST" + weight: 0.6 + - path: "/api/accounts" + method: "GET" + weight: 0.3 + - path: "/api/status" + method: "GET" + weight: 0.1 + +# Error Handling Configuration +error_handling: + # Maximum retry attempts + max_retries: 5 + + # Base retry delay in seconds + retry_delay: 2 + + # Use exponential backoff + exponential_backoff: true + + # Enable automatic recovery + auto_recovery: true + + # Error category definitions + error_categories: + critical: + - "DatabaseConnectionError" + - "OutOfMemoryError" + - "SystemError" + recoverable: + - "TimeoutError" + - "ConnectionError" + - "RateLimitError" + - "TemporaryError" + ignorable: + - "ClientCancellationError" + - "NotFoundError" + + # Auto-fix strategies + auto_fix_strategies: + - error_type: "DatabaseConnectionError" + strategy: "reconnect_with_backoff" + - error_type: "RateLimitError" + strategy: "rate_limit_backoff" + - error_type: "TimeoutError" + strategy: "exponential_backoff" + - error_type: "ConnectionError" + strategy: "reconnect_with_backoff" + +# Monitoring Configuration +monitoring: + # Metrics collection interval (seconds) + metrics_interval: 10 + + # Metrics aggregation interval (seconds) + aggregation_interval: 60 + + # Dashboard port + dashboard_port: 8080 + + # Enable alerts + enable_alerts: true + + # Alert thresholds + alert_thresholds: + error_rate: 10.0 # percentage + stress_load: 90.0 # percentage + + # Storage path for metrics + storage_path: "data/metrics.json" + +# Network Simulation (optional) +network: + # Enable network simulation + enabled: false + + # Simulated latency in milliseconds + latency_ms: 0 + + # Packet loss percentage + packet_loss: 0.0 + + # Bandwidth limit in Mbps + bandwidth_limit_mbps: 0 + +# Pull Request Automation +pr_automation: + # GitHub repository owner + repo_owner: "PiCoreTeam" + + # GitHub repository name + repo_name: "pi-node-docker" + + # Base branch for PRs + base_branch: "master" + + # GitHub token (use environment variable) + github_token: "${GITHUB_TOKEN}" + + # Auto-merge approved PRs + auto_merge: true + + # Merge method (merge, squash, rebase) + merge_method: "squash" + +# Logging Configuration +logging: + # Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + level: "INFO" + + # Log file path + file: "logs/stress_testing.log" + + # Log format + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + + # Maximum log file size in bytes + max_bytes: 10485760 # 10MB + + # Number of backup log files + backup_count: 5 \ No newline at end of file diff --git a/src_common_constants.py b/src_common_constants.py new file mode 100644 index 00000000..db4bb57d --- /dev/null +++ b/src_common_constants.py @@ -0,0 +1,74 @@ +""" +System Constants +Centralized constants used across the system +""" + +# Version +VERSION = "1.0.0" +SYSTEM_NAME = "Intelligent Stress Testing System" + +# Error Categories +ERROR_CATEGORY_CRITICAL = "critical" +ERROR_CATEGORY_RECOVERABLE = "recoverable" +ERROR_CATEGORY_IGNORABLE = "ignorable" +ERROR_CATEGORY_WARNING = "warning" + +# Error Actions +ERROR_ACTION_ALERT_AND_RETRY = "alert_and_retry" +ERROR_ACTION_AUTO_RETRY = "auto_retry" +ERROR_ACTION_LOG_ONLY = "log_only" +ERROR_ACTION_ESCALATE = "escalate" + +# Recovery Strategies +STRATEGY_GENERIC_RETRY = "generic_retry" +STRATEGY_EXPONENTIAL_BACKOFF = "exponential_backoff" +STRATEGY_RECONNECT = "reconnect_with_backoff" +STRATEGY_RATE_LIMIT = "rate_limit_backoff" +STRATEGY_RESTART = "restart_with_increased_memory" +STRATEGY_REFRESH_CREDENTIALS = "refresh_credentials" +STRATEGY_LOG_CONTINUE = "log_and_continue" + +# Default Configuration Values +DEFAULT_MAX_RETRIES = 5 +DEFAULT_RETRY_DELAY = 2.0 +DEFAULT_MAX_CONCURRENT = 100 +DEFAULT_TEST_DURATION = 3600 +DEFAULT_METRICS_INTERVAL = 10 +DEFAULT_DASHBOARD_PORT = 8080 + +# HTTP Status Codes +HTTP_OK = 200 +HTTP_CREATED = 201 +HTTP_BAD_REQUEST = 400 +HTTP_UNAUTHORIZED = 401 +HTTP_FORBIDDEN = 403 +HTTP_NOT_FOUND = 404 +HTTP_RATE_LIMIT = 429 +HTTP_SERVER_ERROR = 500 + +# Metrics Thresholds +ERROR_RATE_WARNING_THRESHOLD = 5.0 # percentage +ERROR_RATE_CRITICAL_THRESHOLD = 10.0 # percentage +LOAD_WARNING_THRESHOLD = 70.0 # percentage +LOAD_CRITICAL_THRESHOLD = 90.0 # percentage + +# Time Constants +SECOND = 1 +MINUTE = 60 +HOUR = 3600 +DAY = 86400 + +# Storage Paths +DEFAULT_LOGS_DIR = "logs" +DEFAULT_DATA_DIR = "data" +DEFAULT_CONFIG_DIR = "config" + +# GitHub PR Settings +PR_LABEL_AUTOMATED = "automated" +PR_LABEL_STRESS_TEST = "stress-test" +PR_LABEL_ERROR_FIX = "error-fix" + +# Dashboard Settings +DASHBOARD_UPDATE_INTERVAL = 2 # seconds +METRICS_HISTORY_LIMIT = 1000 +METRICS_DISPLAY_LIMIT = 100 \ No newline at end of file diff --git a/src_common_logger.py b/src_common_logger.py new file mode 100644 index 00000000..52f1785c --- /dev/null +++ b/src_common_logger.py @@ -0,0 +1,75 @@ +""" +Logging Configuration +Centralized logging setup for all components +""" + +import logging +import logging.handlers +import sys +from pathlib import Path +from typing import Dict, Any + + +def setup_logging(config: Dict[str, Any] = None) -> logging.Logger: + """ + Setup centralized logging configuration + + Args: + config: Logging configuration dictionary + + Returns: + Configured logger instance + """ + if config is None: + config = {} + + # Get configuration values + log_level = config.get('level', 'INFO') + log_file = config.get('file', 'logs/stress_testing.log') + log_format = config.get( + 'format', + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + max_bytes = config.get('max_bytes', 10 * 1024 * 1024) # 10MB + backup_count = config.get('backup_count', 5) + + # Create logs directory if it doesn't exist + log_path = Path(log_file) + log_path.parent.mkdir(parents=True, exist_ok=True) + + # Create formatter + formatter = logging.Formatter(log_format) + + # Setup root logger + root_logger = logging.getLogger() + root_logger.setLevel(getattr(logging, log_level.upper())) + + # Remove existing handlers + root_logger.handlers = [] + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(getattr(logging, log_level.upper())) + console_handler.setFormatter(formatter) + root_logger.addHandler(console_handler) + + # File handler with rotation + file_handler = logging.handlers.RotatingFileHandler( + log_file, + maxBytes=max_bytes, + backupCount=backup_count + ) + file_handler.setLevel(getattr(logging, log_level.upper())) + file_handler.setFormatter(formatter) + root_logger.addHandler(file_handler) + + # Create application logger + logger = logging.getLogger('stress_testing_system') + logger.info("Logging system initialized") + + return logger + + +def get_logger(name: str) -> logging.Logger: + """Get a logger instance for a specific module""" + return logging.getLogger(f'stress_testing_system.{name}') \ No newline at end of file diff --git a/src_common_utils.py b/src_common_utils.py new file mode 100644 index 00000000..e16871d0 --- /dev/null +++ b/src_common_utils.py @@ -0,0 +1,137 @@ +""" +Common Utility Functions +Shared utility functions used across the system +""" + +import yaml +import json +import os +from pathlib import Path +from typing import Dict, Any, Optional +import hashlib +import time + + +def load_config(config_path: str) -> Dict[str, Any]: + """ + Load configuration from YAML file + + Args: + config_path: Path to configuration file + + Returns: + Configuration dictionary + """ + config_file = Path(config_path) + + if not config_file.exists(): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + with open(config_file, 'r') as f: + if config_path.endswith('.yaml') or config_path.endswith('.yml'): + config = yaml.safe_load(f) + elif config_path.endswith('.json'): + config = json.load(f) + else: + raise ValueError(f"Unsupported configuration file format: {config_path}") + + # Environment variable substitution + config = substitute_env_vars(config) + + return config + + +def substitute_env_vars(data: Any) -> Any: + """ + Recursively substitute environment variables in configuration + Format: ${VAR_NAME} or ${VAR_NAME:default_value} + """ + if isinstance(data, dict): + return {k: substitute_env_vars(v) for k, v in data.items()} + elif isinstance(data, list): + return [substitute_env_vars(item) for item in data] + elif isinstance(data, str): + if data.startswith('${') and data.endswith('}'): + var_expr = data[2:-1] + if ':' in var_expr: + var_name, default = var_expr.split(':', 1) + return os.getenv(var_name, default) + else: + return os.getenv(var_expr, data) + return data + + +def save_json(data: Dict[str, Any], filepath: str, pretty: bool = True): + """Save data to JSON file""" + Path(filepath).parent.mkdir(parents=True, exist_ok=True) + + with open(filepath, 'w') as f: + if pretty: + json.dump(data, f, indent=2) + else: + json.dump(data, f) + + +def load_json(filepath: str) -> Dict[str, Any]: + """Load data from JSON file""" + with open(filepath, 'r') as f: + return json.load(f) + + +def calculate_hash(data: str, algorithm: str = 'sha256') -> str: + """Calculate hash of string data""" + hasher = hashlib.new(algorithm) + hasher.update(data.encode('utf-8')) + return hasher.hexdigest() + + +def format_bytes(bytes_value: int) -> str: + """Format bytes to human readable format""" + for unit in ['B', 'KB', 'MB', 'GB', 'TB']: + if bytes_value < 1024.0: + return f"{bytes_value:.2f} {unit}" + bytes_value /= 1024.0 + return f"{bytes_value:.2f} PB" + + +def format_duration(seconds: float) -> str: + """Format duration in seconds to human readable format""" + if seconds < 60: + return f"{seconds:.2f}s" + elif seconds < 3600: + minutes = seconds / 60 + return f"{minutes:.2f}m" + elif seconds < 86400: + hours = seconds / 3600 + return f"{hours:.2f}h" + else: + days = seconds / 86400 + return f"{days:.2f}d" + + +def ensure_directory(path: str): + """Ensure directory exists""" + Path(path).mkdir(parents=True, exist_ok=True) + + +def timestamp_to_iso(timestamp: float) -> str: + """Convert Unix timestamp to ISO format string""" + from datetime import datetime + return datetime.fromtimestamp(timestamp).isoformat() + + +class Timer: + """Context manager for timing operations""" + + def __init__(self, name: str = "Operation"): + self.name = name + self.start_time = None + self.duration = None + + def __enter__(self): + self.start_time = time.time() + return self + + def __exit__(self, *args): + self.duration = time.time() - self.start_time + print(f"{self.name} took {format_duration(self.duration)}") \ No newline at end of file diff --git a/src_dashboard_backend_api.py b/src_dashboard_backend_api.py new file mode 100644 index 00000000..63dea1df --- /dev/null +++ b/src_dashboard_backend_api.py @@ -0,0 +1,89 @@ +""" +Dashboard API Server +Provides RESTful API for dashboard frontend +""" + +import asyncio +import logging +from typing import Dict, Any +from aiohttp import web +import json + + +class DashboardAPI: + """RESTful API server for dashboard""" + + def __init__(self, metrics_collector, config: Dict[str, Any]): + self.metrics_collector = metrics_collector + self.config = config + self.logger = logging.getLogger(__name__) + self.app = web.Application() + self._setup_routes() + + def _setup_routes(self): + """Setup API routes""" + self.app.router.add_get('/api/metrics/current', self.get_current_metrics) + self.app.router.add_get('/api/metrics/history', self.get_metrics_history) + self.app.router.add_get('/api/health', self.health_check) + self.app.router.add_get('/', self.serve_dashboard) + + # Enable CORS + self.app.middlewares.append(self._cors_middleware) + + @web.middleware + async def _cors_middleware(self, request, handler): + """CORS middleware""" + response = await handler(request) + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type' + return response + + async def get_current_metrics(self, request): + """Get current metrics""" + try: + metrics = self.metrics_collector.get_current_metrics() + return web.json_response(metrics) + except Exception as e: + self.logger.error(f"Error getting current metrics: {e}") + return web.json_response({'error': str(e)}, status=500) + + async def get_metrics_history(self, request): + """Get metrics history""" + try: + limit = int(request.query.get('limit', 100)) + history = self.metrics_collector.get_metrics_history(limit) + return web.json_response({'history': history}) + except Exception as e: + self.logger.error(f"Error getting metrics history: {e}") + return web.json_response({'error': str(e)}, status=500) + + async def health_check(self, request): + """Health check endpoint""" + return web.json_response({'status': 'healthy', 'timestamp': asyncio.get_event_loop().time()}) + + async def serve_dashboard(self, request): + """Serve dashboard HTML""" + html = """ + + + + Stress Testing Dashboard + + + +

Stress Testing Dashboard

+

API is running. Use /api/metrics/current to get metrics.

+ + + """ + return web.Response(text=html, content_type='text/html') + + async def start(self): + """Start the API server""" + port = self.config.get('port', 8080) + runner = web.AppRunner(self.app) + await runner.setup() + site = web.TCPSite(runner, '0.0.0.0', port) + await site.start() + self.logger.info(f"Dashboard API started on port {port}") \ No newline at end of file diff --git a/src_dashboard_backend_metrics_collector.py b/src_dashboard_backend_metrics_collector.py new file mode 100644 index 00000000..ce38e72c --- /dev/null +++ b/src_dashboard_backend_metrics_collector.py @@ -0,0 +1,146 @@ +""" +Metrics Collection System +Collects, aggregates, and stores system metrics +""" + +import asyncio +import logging +import time +from typing import Dict, Any, List +from dataclasses import dataclass, asdict +from datetime import datetime +import json + + +@dataclass +class SystemMetrics: + """Container for system metrics""" + timestamp: float + error_rate: float = 0.0 + stress_load: float = 0.0 + bypassed_errors: int = 0 + fixed_errors: int = 0 + manual_intervention_count: int = 0 + success_rate: float = 100.0 + uptime: float = 0.0 + active_recovery_processes: int = 0 + total_requests: int = 0 + successful_requests: int = 0 + failed_requests: int = 0 + + +class MetricsCollector: + """Collects and manages system metrics""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.logger = logging.getLogger(__name__) + self.metrics = SystemMetrics(timestamp=time.time()) + self.metrics_history: List[SystemMetrics] = [] + self.start_time = time.time() + self.storage_path = config.get('storage_path', 'data/metrics.json') + + async def record_test_results(self, results): + """Record stress test results""" + self.metrics.total_requests += results.total_requests + self.metrics.successful_requests += results.successful_requests + self.metrics.failed_requests += results.failed_requests + + # Calculate error rate + if self.metrics.total_requests > 0: + self.metrics.error_rate = ( + self.metrics.failed_requests / self.metrics.total_requests + ) * 100 + + self.metrics.success_rate = ( + self.metrics.successful_requests / self.metrics.total_requests + ) * 100 + + self.logger.debug(f"Recorded test results: {results.total_requests} requests") + + async def record_error(self, classification): + """Record an error classification""" + self.metrics.failed_requests += 1 + self.logger.debug(f"Recorded error: {classification.error_type}") + + async def record_critical_error(self, exception: Exception): + """Record a critical error""" + self.logger.critical(f"Critical error recorded: {exception}") + + async def increment_active_recovery(self): + """Increment active recovery process count""" + self.metrics.active_recovery_processes += 1 + + async def decrement_active_recovery(self): + """Decrement active recovery process count""" + self.metrics.active_recovery_processes = max( + 0, + self.metrics.active_recovery_processes - 1 + ) + + async def increment_fixed_errors(self): + """Increment fixed errors count""" + self.metrics.fixed_errors += 1 + self.logger.info(f"Fixed errors count: {self.metrics.fixed_errors}") + + async def increment_bypassed_errors(self): + """Increment bypassed errors count""" + self.metrics.bypassed_errors += 1 + self.logger.info(f"Bypassed errors count: {self.metrics.bypassed_errors}") + + async def increment_manual_intervention_count(self): + """Increment manual intervention count""" + self.metrics.manual_intervention_count += 1 + self.logger.warning( + f"Manual intervention count: {self.metrics.manual_intervention_count}" + ) + + async def record_system_metrics(self, metrics: Dict[str, Any]): + """Record system-level metrics""" + self.metrics.stress_load = metrics.get('load', 0.0) + self.metrics.uptime = time.time() - self.start_time + + async def aggregate_and_store(self): + """Aggregate and store metrics""" + # Update timestamp + self.metrics.timestamp = time.time() + + # Add to history + self.metrics_history.append(SystemMetrics(**asdict(self.metrics))) + + # Keep only last 1000 entries + if len(self.metrics_history) > 1000: + self.metrics_history = self.metrics_history[-1000:] + + # Store to file + await self._save_metrics() + + self.logger.debug("Metrics aggregated and stored") + + async def _save_metrics(self): + """Save metrics to storage""" + try: + data = { + 'current': asdict(self.metrics), + 'history': [asdict(m) for m in self.metrics_history[-100:]] + } + + # In real implementation, save to database or file + # For now, just log + self.logger.debug(f"Metrics saved: {len(self.metrics_history)} entries") + + except Exception as e: + self.logger.error(f"Error saving metrics: {e}") + + def get_current_metrics(self) -> Dict[str, Any]: + """Get current metrics as dictionary""" + return asdict(self.metrics) + + def get_metrics_history(self, limit: int = 100) -> List[Dict[str, Any]]: + """Get metrics history""" + return [asdict(m) for m in self.metrics_history[-limit:]] + + async def close(self): + """Close metrics collector""" + await self._save_metrics() + self.logger.info("Metrics collector closed") \ No newline at end of file diff --git a/src_dashboard_frontend_index.html b/src_dashboard_frontend_index.html new file mode 100644 index 00000000..6f1d77c1 --- /dev/null +++ b/src_dashboard_frontend_index.html @@ -0,0 +1,156 @@ + + + + + + Intelligent Stress Testing Dashboard + + + + +
+ +
+

🚀 Intelligent Stress Testing Dashboard

+
+ + Connecting... +
+
+ + +
+ +
+
⚠️
+
+

Error Rate

+
0.00%
+
of total requests
+
+
+ + +
+
📊
+
+

Stress Load

+
0.00%
+
current capacity
+
+
+ + +
+
⏭️
+
+

Bypassed Errors

+
0
+
auto-handled
+
+
+ + +
+
+
+

Fixed Errors

+
0
+
automatically resolved
+
+
+ + +
+
👤
+
+

Manual Intervention

+
0
+
requires attention
+
+
+ + +
+
🎯
+
+

Success Rate

+
100.00%
+
overall performance
+
+
+ + +
+
⏱️
+
+

System Uptime

+
0h 0m
+
continuous operation
+
+
+ + +
+
🔧
+
+

Active Recovery

+
0
+
processes running
+
+
+
+ + +
+
+

Error Rate Trend

+ +
+
+

Request Statistics

+ +
+
+ + +
+

📈 Detailed Statistics

+ + + + + + + + + + + + + + + + + + + + + + + + + +
MetricCurrent ValueTotal
Total Requests0-
Successful Requests00%
Failed Requests00%
+
+ + + +
+ + + + \ No newline at end of file diff --git a/src_dashboard_frontend_styles.css b/src_dashboard_frontend_styles.css new file mode 100644 index 00000000..7756c79c --- /dev/null +++ b/src_dashboard_frontend_styles.css @@ -0,0 +1,243 @@ +/* Global Styles */ +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: #333; + line-height: 1.6; + min-height: 100vh; +} + +.container { + max-width: 1400px; + margin: 0 auto; + padding: 20px; +} + +/* Header */ +.header { + background: white; + padding: 30px; + border-radius: 15px; + box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1); + margin-bottom: 30px; + display: flex; + justify-content: space-between; + align-items: center; +} + +.header h1 { + font-size: 2.5rem; + color: #667eea; + text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.1); +} + +.status-indicator { + display: flex; + align-items: center; + gap: 10px; + font-size: 1.1rem; +} + +.status-dot { + width: 15px; + height: 15px; + border-radius: 50%; + background-color: #ffc107; + animation: pulse 2s infinite; +} + +.status-dot.online { + background-color: #4caf50; +} + +.status-dot.offline { + background-color: #f44336; + animation: none; +} + +@keyframes pulse { + 0%, 100% { + opacity: 1; + } + 50% { + opacity: 0.5; + } +} + +/* Metrics Grid */ +.metrics-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); + gap: 20px; + margin-bottom: 30px; +} + +.metric-card { + background: white; + padding: 25px; + border-radius: 15px; + box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); + display: flex; + align-items: center; + gap: 20px; + transition: transform 0.3s ease, box-shadow 0.3s ease; +} + +.metric-card:hover { + transform: translateY(-5px); + box-shadow: 0 10px 25px rgba(0, 0, 0, 0.15); +} + +.metric-card.success { + border-left: 5px solid #4caf50; +} + +.metric-card.warning { + border-left: 5px solid #ff9800; +} + +.metric-card.error { + border-left: 5px solid #f44336; +} + +.metric-icon { + font-size: 3rem; +} + +.metric-content { + flex: 1; +} + +.metric-content h3 { + font-size: 0.9rem; + color: #666; + text-transform: uppercase; + letter-spacing: 1px; + margin-bottom: 10px; +} + +.metric-value { + font-size: 2.5rem; + font-weight: bold; + color: #667eea; + margin-bottom: 5px; +} + +.metric-label { + font-size: 0.85rem; + color: #999; +} + +/* Charts Section */ +.charts-section { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(500px, 1fr)); + gap: 20px; + margin-bottom: 30px; +} + +.chart-card { + background: white; + padding: 25px; + border-radius: 15px; + box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); +} + +.chart-card h3 { + margin-bottom: 20px; + color: #667eea; + font-size: 1.3rem; +} + +/* Statistics Table */ +.stats-section { + background: white; + padding: 30px; + border-radius: 15px; + box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); + margin-bottom: 30px; +} + +.stats-section h2 { + margin-bottom: 20px; + color: #667eea; +} + +.stats-table { + width: 100%; + border-collapse: collapse; +} + +.stats-table th, +.stats-table td { + padding: 15px; + text-align: left; + border-bottom: 1px solid #eee; +} + +.stats-table th { + background-color: #f8f9fa; + font-weight: 600; + color: #667eea; +} + +.stats-table tr:hover { + background-color: #f8f9fa; +} + +/* Footer */ +.footer { + background: white; + padding: 20px; + border-radius: 15px; + box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); + text-align: center; + color: #666; +} + +.footer p { + margin: 5px 0; +} + +/* Responsive Design */ +@media (max-width: 768px) { + .header { + flex-direction: column; + gap: 20px; + text-align: center; + } + + .header h1 { + font-size: 1.8rem; + } + + .metrics-grid { + grid-template-columns: 1fr; + } + + .charts-section { + grid-template-columns: 1fr; + } +} + +/* Loading Animation */ +@keyframes shimmer { + 0% { + background-position: -1000px 0; + } + 100% { + background-position: 1000px 0; + } +} + +.loading { + background: linear-gradient(to right, #f6f7f8 0%, #edeef1 20%, #f6f7f8 40%, #f6f7f8 100%); + background-size: 1000px 100%; + animation: shimmer 2s infinite linear; +} \ No newline at end of file diff --git a/src_error_handler_auto_recovery.py b/src_error_handler_auto_recovery.py new file mode 100644 index 00000000..6345bab8 --- /dev/null +++ b/src_error_handler_auto_recovery.py @@ -0,0 +1,269 @@ +""" +Automatic Error Recovery System +Handles error recovery with retry logic and multiple strategies +""" + +import asyncio +import logging +import time +from typing import Optional, Dict, Any, Callable +from dataclasses import dataclass +from error_handler.error_classifier import ErrorClassification, ErrorAction +from error_handler.retry_manager import RetryManager + + +@dataclass +class RecoveryResult: + """Result of a recovery attempt""" + success: bool + attempts: int + duration: float + strategy_used: str + error_message: Optional[str] = None + + +class AutoRecoveryManager: + """Manages automatic error recovery with multiple strategies""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.logger = logging.getLogger(__name__) + self.retry_manager = RetryManager(config) + self.recovery_strategies = self._initialize_strategies() + self.active_recoveries = 0 + + def _initialize_strategies(self) -> Dict[str, Callable]: + """Initialize recovery strategy functions""" + return { + 'generic_retry': self._strategy_generic_retry, + 'exponential_backoff': self._strategy_exponential_backoff, + 'reconnect_with_backoff': self._strategy_reconnect_with_backoff, + 'rate_limit_backoff': self._strategy_rate_limit_backoff, + 'restart_with_increased_memory': self._strategy_restart_component, + 'refresh_credentials': self._strategy_refresh_credentials, + 'log_and_continue': self._strategy_log_and_continue, + } + + async def recover( + self, + error: Exception, + classification: ErrorClassification + ) -> bool: + """ + Attempt to recover from an error + Returns True if recovery was successful + """ + self.active_recoveries += 1 + start_time = time.time() + + try: + self.logger.info( + f"Starting recovery for {classification.error_type} " + f"using strategy: {classification.recommended_strategy}" + ) + + # Get the recovery strategy + strategy = self.recovery_strategies.get( + classification.recommended_strategy, + self._strategy_generic_retry + ) + + # Execute recovery with retry logic + result = await self.retry_manager.execute_with_retry( + strategy, + error, + classification + ) + + duration = time.time() - start_time + + if result.success: + self.logger.info( + f"✅ Recovery successful after {result.attempts} attempts " + f"in {duration:.2f}s" + ) + else: + self.logger.warning( + f"❌ Recovery failed after {result.attempts} attempts " + f"in {duration:.2f}s: {result.error_message}" + ) + + return result.success + + except Exception as e: + self.logger.error(f"Error during recovery process: {e}", exc_info=True) + return False + finally: + self.active_recoveries -= 1 + + # Recovery Strategies + + async def _strategy_generic_retry( + self, + error: Exception, + classification: ErrorClassification + ) -> RecoveryResult: + """Generic retry strategy""" + self.logger.info("Executing generic retry strategy") + + # Simple retry with small delay + await asyncio.sleep(1) + + return RecoveryResult( + success=True, + attempts=1, + duration=1.0, + strategy_used='generic_retry' + ) + + async def _strategy_exponential_backoff( + self, + error: Exception, + classification: ErrorClassification + ) -> RecoveryResult: + """Exponential backoff retry strategy""" + max_attempts = self.config.get('max_retries', 5) + base_delay = self.config.get('retry_delay', 2) + + for attempt in range(max_attempts): + delay = base_delay * (2 ** attempt) + self.logger.info(f"Attempt {attempt + 1}/{max_attempts}, waiting {delay}s...") + + await asyncio.sleep(delay) + + # Simulate retry success (in real implementation, retry the actual operation) + if attempt >= 2: # Success after 3 attempts + return RecoveryResult( + success=True, + attempts=attempt + 1, + duration=sum(base_delay * (2 ** i) for i in range(attempt + 1)), + strategy_used='exponential_backoff' + ) + + return RecoveryResult( + success=False, + attempts=max_attempts, + duration=sum(base_delay * (2 ** i) for i in range(max_attempts)), + strategy_used='exponential_backoff', + error_message="Max retries exceeded" + ) + + async def _strategy_reconnect_with_backoff( + self, + error: Exception, + classification: ErrorClassification + ) -> RecoveryResult: + """Reconnection strategy with exponential backoff""" + self.logger.info("Executing reconnect with backoff strategy") + + max_attempts = 5 + base_delay = 2 + + for attempt in range(max_attempts): + try: + delay = base_delay * (2 ** attempt) + self.logger.info(f"Reconnection attempt {attempt + 1}/{max_attempts}") + + await asyncio.sleep(delay) + + # Simulate reconnection logic + # In real implementation, attempt to reconnect to the service + + if attempt >= 1: # Success after 2 attempts + return RecoveryResult( + success=True, + attempts=attempt + 1, + duration=sum(base_delay * (2 ** i) for i in range(attempt + 1)), + strategy_used='reconnect_with_backoff' + ) + + except Exception as e: + self.logger.warning(f"Reconnection attempt {attempt + 1} failed: {e}") + continue + + return RecoveryResult( + success=False, + attempts=max_attempts, + duration=sum(base_delay * (2 ** i) for i in range(max_attempts)), + strategy_used='reconnect_with_backoff', + error_message="Failed to reconnect" + ) + + async def _strategy_rate_limit_backoff( + self, + error: Exception, + classification: ErrorClassification + ) -> RecoveryResult: + """Rate limit backoff strategy""" + self.logger.info("Executing rate limit backoff strategy") + + # Wait longer for rate limits + delay = 60 # 1 minute + await asyncio.sleep(delay) + + return RecoveryResult( + success=True, + attempts=1, + duration=delay, + strategy_used='rate_limit_backoff' + ) + + async def _strategy_restart_component( + self, + error: Exception, + classification: ErrorClassification + ) -> RecoveryResult: + """Restart component strategy (for critical errors)""" + self.logger.warning("Executing component restart strategy") + + # Simulate component restart + await asyncio.sleep(5) + + return RecoveryResult( + success=True, + attempts=1, + duration=5.0, + strategy_used='restart_with_increased_memory' + ) + + async def _strategy_refresh_credentials( + self, + error: Exception, + classification: ErrorClassification + ) -> RecoveryResult: + """Refresh authentication credentials""" + self.logger.info("Executing credential refresh strategy") + + # Simulate credential refresh + await asyncio.sleep(2) + + return RecoveryResult( + success=True, + attempts=1, + duration=2.0, + strategy_used='refresh_credentials' + ) + + async def _strategy_log_and_continue( + self, + error: Exception, + classification: ErrorClassification + ) -> RecoveryResult: + """Simply log the error and continue""" + self.logger.info(f"Logging error and continuing: {error}") + + return RecoveryResult( + success=True, + attempts=1, + duration=0.0, + strategy_used='log_and_continue' + ) + + async def stop(self): + """Stop the recovery manager""" + # Wait for active recoveries to complete + while self.active_recoveries > 0: + self.logger.info(f"Waiting for {self.active_recoveries} active recoveries to complete...") + await asyncio.sleep(1) + + self.logger.info("Auto recovery manager stopped") \ No newline at end of file diff --git a/src_error_handler_error_classifier.py b/src_error_handler_error_classifier.py new file mode 100644 index 00000000..735b5c7c --- /dev/null +++ b/src_error_handler_error_classifier.py @@ -0,0 +1,229 @@ +""" +Error Classification System +Automatically categorizes errors and determines handling strategy +""" + +import logging +import re +from dataclasses import dataclass +from enum import Enum +from typing import Dict, Any, List + + +class ErrorCategory(Enum): + """Error severity categories""" + CRITICAL = "critical" + RECOVERABLE = "recoverable" + IGNORABLE = "ignorable" + WARNING = "warning" + + +class ErrorAction(Enum): + """Actions to take for different error types""" + ALERT_AND_RETRY = "alert_and_retry" + AUTO_RETRY = "auto_retry" + LOG_ONLY = "log_only" + ESCALATE = "escalate" + + +@dataclass +class ErrorClassification: + """Result of error classification""" + category: ErrorCategory + action: ErrorAction + error_type: str + severity_score: int # 1-10 + is_critical: bool + requires_manual_intervention: bool + recommended_strategy: str + metadata: Dict[str, Any] + + +class ErrorClassifier: + """Intelligent error classification system""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.logger = logging.getLogger(__name__) + self.error_rules = self._load_error_rules() + self.pattern_cache = {} + + def _load_error_rules(self) -> Dict[str, Dict]: + """Load error handling rules from config""" + rules = {} + + # Critical errors + for error_type in self.config.get('error_categories', {}).get('critical', []): + rules[error_type] = { + 'category': ErrorCategory.CRITICAL, + 'action': ErrorAction.ALERT_AND_RETRY, + 'severity': 9, + 'manual_intervention': True + } + + # Recoverable errors + for error_type in self.config.get('error_categories', {}).get('recoverable', []): + rules[error_type] = { + 'category': ErrorCategory.RECOVERABLE, + 'action': ErrorAction.AUTO_RETRY, + 'severity': 5, + 'manual_intervention': False + } + + # Ignorable errors + for error_type in self.config.get('error_categories', {}).get('ignorable', []): + rules[error_type] = { + 'category': ErrorCategory.IGNORABLE, + 'action': ErrorAction.LOG_ONLY, + 'severity': 2, + 'manual_intervention': False + } + + return rules + + def classify(self, error: Exception) -> ErrorClassification: + """Classify an error and determine handling strategy""" + error_type = type(error).__name__ + error_message = str(error) + + # Check if we have a specific rule for this error type + if error_type in self.error_rules: + rule = self.error_rules[error_type] + return self._create_classification(error_type, error_message, rule) + + # Pattern-based classification + classification = self._classify_by_pattern(error_type, error_message) + if classification: + return classification + + # Default classification for unknown errors + return self._create_default_classification(error_type, error_message) + + def classify_exception(self, exception: Exception) -> ErrorClassification: + """Classify an exception (alias for classify)""" + return self.classify(exception) + + def _classify_by_pattern(self, error_type: str, error_message: str) -> ErrorClassification: + """Classify error based on message patterns""" + patterns = { + r'connection.*refused|timeout|timed out': { + 'category': ErrorCategory.RECOVERABLE, + 'action': ErrorAction.AUTO_RETRY, + 'severity': 6, + 'strategy': 'exponential_backoff' + }, + r'out of memory|oom|memory error': { + 'category': ErrorCategory.CRITICAL, + 'action': ErrorAction.ALERT_AND_RETRY, + 'severity': 10, + 'strategy': 'restart_with_increased_memory' + }, + r'rate limit|too many requests|429': { + 'category': ErrorCategory.RECOVERABLE, + 'action': ErrorAction.AUTO_RETRY, + 'severity': 4, + 'strategy': 'rate_limit_backoff' + }, + r'database.*connection|db error|sql': { + 'category': ErrorCategory.CRITICAL, + 'action': ErrorAction.ALERT_AND_RETRY, + 'severity': 8, + 'strategy': 'reconnect_with_backoff' + }, + r'unauthorized|authentication|forbidden': { + 'category': ErrorCategory.CRITICAL, + 'action': ErrorAction.ESCALATE, + 'severity': 9, + 'strategy': 'refresh_credentials' + }, + r'not found|404': { + 'category': ErrorCategory.WARNING, + 'action': ErrorAction.LOG_ONLY, + 'severity': 3, + 'strategy': 'log_and_continue' + } + } + + for pattern, rule in patterns.items(): + if re.search(pattern, error_message, re.IGNORECASE): + return self._create_classification( + error_type, + error_message, + { + 'category': rule['category'], + 'action': rule['action'], + 'severity': rule['severity'], + 'manual_intervention': rule['severity'] >= 8 + }, + recommended_strategy=rule['strategy'] + ) + + return None + + def _create_classification( + self, + error_type: str, + error_message: str, + rule: Dict, + recommended_strategy: str = None + ) -> ErrorClassification: + """Create an ErrorClassification object""" + + # Determine strategy + if not recommended_strategy: + strategies = self.config.get('auto_fix_strategies', []) + for strategy_config in strategies: + if strategy_config.get('error_type') == error_type: + recommended_strategy = strategy_config.get('strategy', 'generic_retry') + break + + if not recommended_strategy: + recommended_strategy = 'generic_retry' + + return ErrorClassification( + category=rule['category'], + action=rule['action'], + error_type=error_type, + severity_score=rule['severity'], + is_critical=rule['severity'] >= 8, + requires_manual_intervention=rule.get('manual_intervention', False), + recommended_strategy=recommended_strategy, + metadata={ + 'error_message': error_message, + 'timestamp': logging.Formatter().formatTime(logging.LogRecord( + '', 0, '', 0, '', (), None + )) + } + ) + + def _create_default_classification( + self, + error_type: str, + error_message: str + ) -> ErrorClassification: + """Create default classification for unknown errors""" + self.logger.warning(f"Unknown error type: {error_type}, using default classification") + + return ErrorClassification( + category=ErrorCategory.WARNING, + action=ErrorAction.AUTO_RETRY, + error_type=error_type, + severity_score=5, + is_critical=False, + requires_manual_intervention=False, + recommended_strategy='generic_retry', + metadata={ + 'error_message': error_message, + 'is_unknown': True + } + ) + + def get_statistics(self) -> Dict[str, int]: + """Get classification statistics""" + return { + 'total_rules': len(self.error_rules), + 'critical_rules': sum(1 for r in self.error_rules.values() + if r['category'] == ErrorCategory.CRITICAL), + 'recoverable_rules': sum(1 for r in self.error_rules.values() + if r['category'] == ErrorCategory.RECOVERABLE), + } \ No newline at end of file diff --git a/src_error_handler_retry_manager.py b/src_error_handler_retry_manager.py new file mode 100644 index 00000000..6ef52535 --- /dev/null +++ b/src_error_handler_retry_manager.py @@ -0,0 +1,80 @@ +""" +Retry Management System +Handles retry logic with configurable policies +""" + +import asyncio +import logging +from typing import Callable, Any, Dict +from dataclasses import dataclass + + +@dataclass +class RetryPolicy: + """Configuration for retry behavior""" + max_retries: int = 5 + base_delay: float = 2.0 + max_delay: float = 60.0 + exponential_backoff: bool = True + jitter: bool = True + + +class RetryManager: + """Manages retry logic for failed operations""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.logger = logging.getLogger(__name__) + self.policy = RetryPolicy( + max_retries=config.get('max_retries', 5), + base_delay=config.get('retry_delay', 2.0), + exponential_backoff=config.get('exponential_backoff', True) + ) + + async def execute_with_retry( + self, + func: Callable, + *args, + **kwargs + ): + """ + Execute a function with retry logic + Returns the result of the function or raises the last exception + """ + last_exception = None + + for attempt in range(self.policy.max_retries): + try: + result = await func(*args, **kwargs) + return result + + except Exception as e: + last_exception = e + self.logger.warning( + f"Attempt {attempt + 1}/{self.policy.max_retries} failed: {e}" + ) + + if attempt < self.policy.max_retries - 1: + delay = self._calculate_delay(attempt) + await asyncio.sleep(delay) + + # All retries exhausted + if last_exception: + raise last_exception + + def _calculate_delay(self, attempt: int) -> float: + """Calculate delay before next retry""" + if self.policy.exponential_backoff: + delay = min( + self.policy.base_delay * (2 ** attempt), + self.policy.max_delay + ) + else: + delay = self.policy.base_delay + + # Add jitter if enabled + if self.policy.jitter: + import random + delay *= (0.5 + random.random()) + + return delay \ No newline at end of file diff --git a/src_pr_automation_pr_manager.py b/src_pr_automation_pr_manager.py new file mode 100644 index 00000000..422f33e1 --- /dev/null +++ b/src_pr_automation_pr_manager.py @@ -0,0 +1,221 @@ +""" +Pull Request Management System +Automatically creates, reviews, and merges pull requests +""" + +import asyncio +import logging +from typing import Dict, Any, Optional, List +from datetime import datetime +import aiohttp + + +class PRManager: + """Manages GitHub pull requests automatically""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.logger = logging.getLogger(__name__) + self.github_token = config.get('github_token', '') + self.repo_owner = config.get('repo_owner', '') + self.repo_name = config.get('repo_name', '') + self.base_branch = config.get('base_branch', 'master') + self.api_base = 'https://api.github.com' + + async def create_pr( + self, + title: str, + body: str, + head_branch: str, + labels: List[str] = None + ) -> Optional[Dict]: + """Create a new pull request""" + self.logger.info(f"Creating PR: {title}") + + url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls" + + headers = { + 'Authorization': f'token {self.github_token}', + 'Accept': 'application/vnd.github.v3+json' + } + + data = { + 'title': title, + 'body': body, + 'head': head_branch, + 'base': self.base_branch + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, json=data, headers=headers) as response: + if response.status == 201: + pr_data = await response.json() + pr_number = pr_data['number'] + + self.logger.info(f"✅ PR #{pr_number} created successfully") + + # Add labels if provided + if labels: + await self._add_labels(pr_number, labels) + + return pr_data + else: + error_text = await response.text() + self.logger.error(f"Failed to create PR: {error_text}") + return None + + except Exception as e: + self.logger.error(f"Error creating PR: {e}", exc_info=True) + return None + + async def _add_labels(self, pr_number: int, labels: List[str]): + """Add labels to a pull request""" + url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/issues/{pr_number}/labels" + + headers = { + 'Authorization': f'token {self.github_token}', + 'Accept': 'application/vnd.github.v3+json' + } + + data = {'labels': labels} + + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, json=data, headers=headers) as response: + if response.status == 200: + self.logger.info(f"Labels added to PR #{pr_number}") + else: + self.logger.warning(f"Failed to add labels to PR #{pr_number}") + + except Exception as e: + self.logger.error(f"Error adding labels: {e}") + + async def review_pr(self, pr_number: int) -> bool: + """Automatically review a pull request""" + self.logger.info(f"Reviewing PR #{pr_number}") + + # Get PR details + pr_data = await self._get_pr(pr_number) + if not pr_data: + return False + + # Run automated checks + checks_passed = await self._run_automated_checks(pr_data) + + # Submit review + return await self._submit_review(pr_number, checks_passed) + + async def _get_pr(self, pr_number: int) -> Optional[Dict]: + """Get pull request details""" + url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls/{pr_number}" + + headers = { + 'Authorization': f'token {self.github_token}', + 'Accept': 'application/vnd.github.v3+json' + } + + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + if response.status == 200: + return await response.json() + return None + + except Exception as e: + self.logger.error(f"Error getting PR: {e}") + return None + + async def _run_automated_checks(self, pr_data: Dict) -> bool: + """Run automated checks on PR""" + self.logger.info("Running automated checks...") + + # Simulate checks + await asyncio.sleep(2) + + # In real implementation: + # - Check code quality + # - Run tests + # - Check for conflicts + # - Validate commit messages + # - Check file changes + + return True + + async def _submit_review(self, pr_number: int, approved: bool) -> bool: + """Submit review for PR""" + url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls/{pr_number}/reviews" + + headers = { + 'Authorization': f'token {self.github_token}', + 'Accept': 'application/vnd.github.v3+json' + } + + event = 'APPROVE' if approved else 'REQUEST_CHANGES' + body = 'Automated review: All checks passed ✅' if approved else 'Automated review: Issues found ❌' + + data = { + 'event': event, + 'body': body + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, json=data, headers=headers) as response: + if response.status == 200: + self.logger.info(f"Review submitted for PR #{pr_number}: {event}") + return True + return False + + except Exception as e: + self.logger.error(f"Error submitting review: {e}") + return False + + async def merge_pr(self, pr_number: int, merge_method: str = 'squash') -> bool: + """Merge a pull request""" + self.logger.info(f"Merging PR #{pr_number} using {merge_method}") + + url = f"{self.api_base}/repos/{self.repo_owner}/{self.repo_name}/pulls/{pr_number}/merge" + + headers = { + 'Authorization': f'token {self.github_token}', + 'Accept': 'application/vnd.github.v3+json' + } + + data = { + 'merge_method': merge_method + } + + try: + async with aiohttp.ClientSession() as session: + async with session.put(url, json=data, headers=headers) as response: + if response.status == 200: + self.logger.info(f"✅ PR #{pr_number} merged successfully") + return True + else: + error_text = await response.text() + self.logger.error(f"Failed to merge PR: {error_text}") + return False + + except Exception as e: + self.logger.error(f"Error merging PR: {e}", exc_info=True) + return False + + async def auto_process_pr(self, pr_number: int) -> bool: + """Automatically review and merge a PR if all checks pass""" + self.logger.info(f"Auto-processing PR #{pr_number}") + + # Review PR + review_passed = await self.review_pr(pr_number) + + if not review_passed: + self.logger.warning(f"PR #{pr_number} did not pass review") + return False + + # Wait a bit before merging + await asyncio.sleep(5) + + # Merge PR + merge_success = await self.merge_pr(pr_number) + + return merge_success \ No newline at end of file diff --git a/src_stress_tester_main.py b/src_stress_tester_main.py new file mode 100644 index 00000000..5fb0ba39 --- /dev/null +++ b/src_stress_tester_main.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +""" +Intelligent Stress Testing System - Main Entry Point +Handles continuous stress testing with automatic error recovery +""" + +import asyncio +import logging +import signal +import sys +from pathlib import Path +from typing import Optional + +import yaml +from stress_engine import StressTestEngine +from network_simulator import NetworkSimulator +from error_handler.error_classifier import ErrorClassifier +from error_handler.auto_recovery import AutoRecoveryManager +from dashboard.backend.metrics_collector import MetricsCollector +from common.logger import setup_logging +from common.utils import load_config + + +class StressTestingSystem: + """Main orchestrator for the intelligent stress testing system""" + + def __init__(self, config_path: str): + self.config = load_config(config_path) + self.logger = setup_logging(self.config.get('logging', {})) + + # Initialize components + self.stress_engine = StressTestEngine(self.config['stress_test']) + self.network_simulator = NetworkSimulator(self.config.get('network', {})) + self.error_classifier = ErrorClassifier(self.config['error_handling']) + self.recovery_manager = AutoRecoveryManager(self.config['error_handling']) + self.metrics_collector = MetricsCollector(self.config['monitoring']) + + self.running = False + self._setup_signal_handlers() + + def _setup_signal_handlers(self): + """Setup graceful shutdown on SIGINT/SIGTERM""" + signal.signal(signal.SIGINT, self._signal_handler) + signal.signal(signal.SIGTERM, self._signal_handler) + + def _signal_handler(self, signum, frame): + """Handle shutdown signals gracefully""" + self.logger.info(f"Received signal {signum}, initiating graceful shutdown...") + self.running = False + + async def start(self): + """Start the stress testing system""" + self.logger.info("🚀 Starting Intelligent Stress Testing System...") + self.running = True + + # Start all components + tasks = [ + asyncio.create_task(self._run_stress_tests()), + asyncio.create_task(self._monitor_system()), + asyncio.create_task(self._collect_metrics()), + ] + + try: + await asyncio.gather(*tasks) + except Exception as e: + self.logger.error(f"Critical error in main loop: {e}", exc_info=True) + finally: + await self.shutdown() + + async def _run_stress_tests(self): + """Main stress testing loop with error recovery""" + self.logger.info("📊 Starting stress test execution...") + + while self.running: + try: + # Execute stress test + results = await self.stress_engine.run_test() + + # Collect metrics + await self.metrics_collector.record_test_results(results) + + # Check for errors + if results.has_errors(): + await self._handle_errors(results.errors) + + except Exception as e: + self.logger.error(f"Error in stress test loop: {e}") + # Classify and handle error in background + await self._handle_exception(e) + # Continue testing without interruption + continue + + await asyncio.sleep(self.config['stress_test'].get('test_interval', 1)) + + async def _handle_errors(self, errors: list): + """Handle errors detected during stress testing""" + for error in errors: + # Classify error + classification = self.error_classifier.classify(error) + + self.logger.info(f"Error classified as: {classification.category}") + + # Record error metrics + await self.metrics_collector.record_error(classification) + + # Handle based on classification + if classification.requires_manual_intervention: + self.logger.warning(f"⚠️ Manual intervention required: {error}") + await self.metrics_collector.increment_manual_intervention_count() + else: + # Start automatic recovery in background + asyncio.create_task( + self._auto_recover(error, classification) + ) + + async def _handle_exception(self, exception: Exception): + """Handle unexpected exceptions""" + classification = self.error_classifier.classify_exception(exception) + + if classification.is_critical: + self.logger.critical(f"🚨 Critical error: {exception}") + await self.metrics_collector.record_critical_error(exception) + else: + # Attempt automatic recovery + asyncio.create_task( + self._auto_recover(exception, classification) + ) + + async def _auto_recover(self, error, classification): + """Attempt automatic error recovery in background""" + self.logger.info(f"🔧 Starting automatic recovery for: {error}") + + try: + # Increment active recovery processes + await self.metrics_collector.increment_active_recovery() + + # Attempt recovery with retry logic + success = await self.recovery_manager.recover(error, classification) + + if success: + self.logger.info(f"✅ Successfully recovered from: {error}") + await self.metrics_collector.increment_fixed_errors() + else: + self.logger.warning(f"❌ Failed to recover from: {error}") + await self.metrics_collector.increment_bypassed_errors() + + except Exception as e: + self.logger.error(f"Error during recovery: {e}") + finally: + await self.metrics_collector.decrement_active_recovery() + + async def _monitor_system(self): + """Monitor system health and resources""" + self.logger.info("🔍 Starting system monitoring...") + + while self.running: + try: + # Collect system metrics + metrics = await self.network_simulator.get_network_stats() + await self.metrics_collector.record_system_metrics(metrics) + + except Exception as e: + self.logger.error(f"Error in monitoring: {e}") + + await asyncio.sleep(self.config['monitoring'].get('metrics_interval', 10)) + + async def _collect_metrics(self): + """Aggregate and store metrics""" + self.logger.info("📈 Starting metrics collection...") + + while self.running: + try: + await self.metrics_collector.aggregate_and_store() + except Exception as e: + self.logger.error(f"Error collecting metrics: {e}") + + await asyncio.sleep(self.config['monitoring'].get('aggregation_interval', 60)) + + async def shutdown(self): + """Graceful shutdown of all components""" + self.logger.info("🛑 Shutting down Intelligent Stress Testing System...") + + # Stop all components + await self.stress_engine.stop() + await self.recovery_manager.stop() + await self.metrics_collector.close() + + self.logger.info("✅ Shutdown complete") + + +async def main(): + """Main entry point""" + config_path = sys.argv[1] if len(sys.argv) > 1 else 'config/stress_test_config.yaml' + + system = StressTestingSystem(config_path) + await system.start() + + +if __name__ == '__main__': + asyncio.run(main()) \ No newline at end of file diff --git a/src_stress_tester_network_simulator.py b/src_stress_tester_network_simulator.py new file mode 100644 index 00000000..55b0cab3 --- /dev/null +++ b/src_stress_tester_network_simulator.py @@ -0,0 +1,138 @@ +""" +Network Simulation and Monitoring +Simulates network conditions and monitors network statistics +""" + +import asyncio +import logging +import psutil +import time +from typing import Dict, Any +from dataclasses import dataclass + + +@dataclass +class NetworkStats: + """Network statistics container""" + bytes_sent: int + bytes_recv: int + packets_sent: int + packets_recv: int + errin: int + errout: int + dropin: int + dropout: int + timestamp: float + + +class NetworkSimulator: + """Network simulation and monitoring system""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.logger = logging.getLogger(__name__) + self.enabled = config.get('enabled', True) + self.latency_ms = config.get('latency_ms', 0) + self.packet_loss = config.get('packet_loss', 0.0) + self.bandwidth_limit = config.get('bandwidth_limit_mbps', 0) + + async def get_network_stats(self) -> Dict[str, Any]: + """Get current network statistics""" + try: + # Get network I/O statistics + net_io = psutil.net_io_counters() + + stats = NetworkStats( + bytes_sent=net_io.bytes_sent, + bytes_recv=net_io.bytes_recv, + packets_sent=net_io.packets_sent, + packets_recv=net_io.packets_recv, + errin=net_io.errin, + errout=net_io.errout, + dropin=net_io.dropin, + dropout=net_io.dropout, + timestamp=time.time() + ) + + # Calculate load percentage + load = self._calculate_load(stats) + + return { + 'bytes_sent': stats.bytes_sent, + 'bytes_recv': stats.bytes_recv, + 'packets_sent': stats.packets_sent, + 'packets_recv': stats.packets_recv, + 'errors': stats.errin + stats.errout, + 'drops': stats.dropin + stats.dropout, + 'load': load, + 'timestamp': stats.timestamp + } + + except Exception as e: + self.logger.error(f"Error getting network stats: {e}") + return { + 'load': 0.0, + 'errors': 0, + 'timestamp': time.time() + } + + def _calculate_load(self, stats: NetworkStats) -> float: + """Calculate network load percentage""" + # Simple load calculation based on packet rate + # In real implementation, this would be more sophisticated + + if hasattr(self, '_last_stats'): + time_delta = stats.timestamp - self._last_stats.timestamp + if time_delta > 0: + packets_per_sec = ( + (stats.packets_sent - self._last_stats.packets_sent) + + (stats.packets_recv - self._last_stats.packets_recv) + ) / time_delta + + # Assume max capacity of 10000 packets/sec + load = min((packets_per_sec / 10000) * 100, 100) + else: + load = 0.0 + else: + load = 0.0 + + self._last_stats = stats + return load + + async def simulate_latency(self, duration_ms: int = None): + """Simulate network latency""" + if not self.enabled: + return + + latency = duration_ms if duration_ms else self.latency_ms + if latency > 0: + await asyncio.sleep(latency / 1000) + + async def simulate_packet_loss(self) -> bool: + """ + Simulate packet loss + Returns True if packet should be dropped + """ + if not self.enabled or self.packet_loss <= 0: + return False + + import random + return random.random() < (self.packet_loss / 100) + + def get_system_info(self) -> Dict[str, Any]: + """Get system information""" + try: + cpu_percent = psutil.cpu_percent(interval=1) + memory = psutil.virtual_memory() + disk = psutil.disk_usage('/') + + return { + 'cpu_percent': cpu_percent, + 'memory_percent': memory.percent, + 'memory_available_gb': memory.available / (1024**3), + 'disk_percent': disk.percent, + 'disk_free_gb': disk.free / (1024**3) + } + except Exception as e: + self.logger.error(f"Error getting system info: {e}") + return {} \ No newline at end of file diff --git a/src_stress_tester_stress_engine.py b/src_stress_tester_stress_engine.py new file mode 100644 index 00000000..3c2c2b24 --- /dev/null +++ b/src_stress_tester_stress_engine.py @@ -0,0 +1,119 @@ +""" +Stress Testing Engine +Handles load generation and test execution +""" + +import asyncio +import time +from typing import Dict, List, Any +from dataclasses import dataclass, field +import aiohttp +import logging + + +@dataclass +class StressTestResult: + """Container for stress test results""" + start_time: float + end_time: float + total_requests: int = 0 + successful_requests: int = 0 + failed_requests: int = 0 + average_response_time: float = 0.0 + errors: List[Exception] = field(default_factory=list) + + def has_errors(self) -> bool: + return len(self.errors) > 0 + + def success_rate(self) -> float: + if self.total_requests == 0: + return 0.0 + return (self.successful_requests / self.total_requests) * 100 + + +class StressTestEngine: + """Executes stress tests with configurable load patterns""" + + def __init__(self, config: Dict[str, Any]): + self.config = config + self.logger = logging.getLogger(__name__) + self.session: Optional[aiohttp.ClientSession] = None + self.running = False + + async def run_test(self) -> StressTestResult: + """Execute a stress test cycle""" + start_time = time.time() + result = StressTestResult(start_time=start_time, end_time=0) + + # Initialize session if needed + if not self.session: + self.session = aiohttp.ClientSession() + + # Get test configuration + max_concurrent = self.config.get('max_concurrent_users', 100) + endpoints = self.config.get('endpoints', []) + + # Create tasks for concurrent requests + tasks = [] + for i in range(max_concurrent): + endpoint = self._select_endpoint(endpoints, i) + task = asyncio.create_task(self._execute_request(endpoint, result)) + tasks.append(task) + + # Wait for all requests to complete + await asyncio.gather(*tasks, return_exceptions=True) + + result.end_time = time.time() + result.average_response_time = self._calculate_avg_response_time(result) + + self.logger.info( + f"Stress test completed: {result.successful_requests}/{result.total_requests} successful " + f"({result.success_rate():.2f}%)" + ) + + return result + + def _select_endpoint(self, endpoints: List[Dict], index: int) -> Dict: + """Select endpoint based on weight distribution""" + if not endpoints: + return {'path': '/', 'method': 'GET'} + + # Simple round-robin for now (can be enhanced with weighted selection) + return endpoints[index % len(endpoints)] + + async def _execute_request(self, endpoint: Dict, result: StressTestResult): + """Execute a single HTTP request""" + result.total_requests += 1 + + try: + method = endpoint.get('method', 'GET') + path = endpoint.get('path', '/') + url = f"{self.config.get('base_url', 'http://localhost')}{path}" + + async with self.session.request(method, url, timeout=aiohttp.ClientTimeout(total=30)) as response: + await response.read() + + if 200 <= response.status < 300: + result.successful_requests += 1 + else: + result.failed_requests += 1 + + except Exception as e: + result.failed_requests += 1 + result.errors.append(e) + self.logger.debug(f"Request failed: {e}") + + def _calculate_avg_response_time(self, result: StressTestResult) -> float: + """Calculate average response time""" + if result.total_requests == 0: + return 0.0 + + duration = result.end_time - result.start_time + return duration / result.total_requests + + async def stop(self): + """Stop the stress test engine""" + self.running = False + if self.session: + await self.session.close() + self.logger.info("Stress test engine stopped") \ No newline at end of file From af3c4b7119dbc43ce08661352876370a207df595 Mon Sep 17 00:00:00 2001 From: Ze0ro99 <146000493+Ze0ro99@users.noreply.github.com> Date: Sun, 8 Feb 2026 17:36:17 +0300 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- config_error_handling_rules.yaml.txt | 36 +++++++++++++++------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/config_error_handling_rules.yaml.txt b/config_error_handling_rules.yaml.txt index cefe05f8..6cf80d65 100644 --- a/config_error_handling_rules.yaml.txt +++ b/config_error_handling_rules.yaml.txt @@ -3,31 +3,35 @@ # Error Category Definitions error_categories: critical: - - "DatabaseConnectionError" - - "OutOfMemoryError" - - "SystemError" - - "FatalError" - - "SecurityError" + errors: + - "DatabaseConnectionError" + - "OutOfMemoryError" + - "SystemError" + - "FatalError" + - "SecurityError" action: "alert_and_retry" recoverable: - - "TimeoutError" - - "ConnectionError" - - "RateLimitError" - - "TemporaryError" - - "NetworkError" - - "ServiceUnavailableError" + errors: + - "TimeoutError" + - "ConnectionError" + - "RateLimitError" + - "TemporaryError" + - "NetworkError" + - "ServiceUnavailableError" action: "auto_retry" ignorable: - - "ClientCancellationError" - - "NotFoundError" - - "ValidationError" + errors: + - "ClientCancellationError" + - "NotFoundError" + - "ValidationError" action: "log_only" warning: - - "DeprecationWarning" - - "ConfigurationWarning" + errors: + - "DeprecationWarning" + - "ConfigurationWarning" action: "log_only" # Recovery Strategy Definitions From 91070ac7e4b37dd40703b7311bb12269ddd3643b Mon Sep 17 00:00:00 2001 From: Ze0ro99 <146000493+Ze0ro99@users.noreply.github.com> Date: Sun, 8 Feb 2026 17:42:58 +0300 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src_common_utils.py | 2 +- src_dashboard_backend_api.py | 32 +++++++++++++++++++--- src_dashboard_backend_metrics_collector.py | 10 ++----- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src_common_utils.py b/src_common_utils.py index e16871d0..3ff19f74 100644 --- a/src_common_utils.py +++ b/src_common_utils.py @@ -7,7 +7,7 @@ import json import os from pathlib import Path -from typing import Dict, Any, Optional +from typing import Dict, Any import hashlib import time diff --git a/src_dashboard_backend_api.py b/src_dashboard_backend_api.py index 63dea1df..ad95931f 100644 --- a/src_dashboard_backend_api.py +++ b/src_dashboard_backend_api.py @@ -7,7 +7,7 @@ import logging from typing import Dict, Any from aiohttp import web -import json + class DashboardAPI: @@ -32,11 +32,35 @@ def _setup_routes(self): @web.middleware async def _cors_middleware(self, request, handler): - """CORS middleware""" - response = await handler(request) - response.headers['Access-Control-Allow-Origin'] = '*' + """CORS middleware with basic preflight handling and restricted origins.""" + origin = request.headers.get('Origin') + + # Allow overriding CORS origins via config; default to localhost-only. + allowed_origins = self.config.get( + 'allowed_origins', + ['http://localhost', 'http://127.0.0.1'] + ) + + # Handle preflight OPTIONS requests directly to avoid 405s on handlers. + if request.method == 'OPTIONS': + response = web.Response(status=204) + else: + response = await handler(request) + + # Set CORS headers only for allowed origins. + if origin and origin in allowed_origins: + response.headers['Access-Control-Allow-Origin'] = origin + # Ensure caches vary on Origin when CORS is applied. + existing_vary = response.headers.get('Vary') + if existing_vary: + if 'Origin' not in existing_vary: + response.headers['Vary'] = existing_vary + ', Origin' + else: + response.headers['Vary'] = 'Origin' + response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS' response.headers['Access-Control-Allow-Headers'] = 'Content-Type' + response.headers['Access-Control-Max-Age'] = '3600' return response async def get_current_metrics(self, request): diff --git a/src_dashboard_backend_metrics_collector.py b/src_dashboard_backend_metrics_collector.py index ce38e72c..8b192a8a 100644 --- a/src_dashboard_backend_metrics_collector.py +++ b/src_dashboard_backend_metrics_collector.py @@ -3,7 +3,7 @@ Collects, aggregates, and stores system metrics """ -import asyncio + import logging import time from typing import Dict, Any, List @@ -60,7 +60,7 @@ async def record_test_results(self, results): async def record_error(self, classification): """Record an error classification""" - self.metrics.failed_requests += 1 + self.logger.debug(f"Recorded error: {classification.error_type}") async def record_critical_error(self, exception: Exception): @@ -120,11 +120,7 @@ async def aggregate_and_store(self): async def _save_metrics(self): """Save metrics to storage""" try: - data = { - 'current': asdict(self.metrics), - 'history': [asdict(m) for m in self.metrics_history[-100:]] - } - + # In real implementation, save to database or file # For now, just log self.logger.debug(f"Metrics saved: {len(self.metrics_history)} entries")