diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e408619 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# 配置文件 (包含敏感信息) +config.json + +# 日志文件 +*.log +ipmi_fan_control.log + +# Python 缓存 +__pycache__/ +*.py[cod] +*$py.class +*.so + +# 虚拟环境 +venv/ +env/ +ENV/ + +# IDE 文件 +.vscode/ +.idea/ +*.swp +*.swo + +# 系统文件 +.DS_Store +Thumbs.db + +# 临时文件 +*.tmp +*.temp diff --git a/README.md b/README.md index 5564210..f72db42 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,220 @@ -# ipmi-auto -IPMI auto fan control policy script for Dell servers +# IPMI Auto Fan Control -适用于戴尔服务器的 IPMI 自动风扇控制策略脚本 +🌡️ 智能的戴尔服务器 IPMI 自动风扇控制工具 + +一个专业的、跨平台的 IPMI 风扇控制解决方案,支持自定义温度策略、完善的错误处理和详细的日志记录。 + +## ✨ 特性 + +- 🔧 **配置化管理** - 通过 JSON 配置文件管理所有设置 +- 🖥️ **跨平台支持** - 支持 Windows 和 Linux 系统 +- 🔄 **智能重试** - 网络异常时自动重试,提高稳定性 +- 📊 **详细日志** - 完整的操作日志,便于监控和调试 +- ⚙️ **灵活策略** - 可自定义温度阈值和风扇转速策略 +- 🧪 **测试模式** - 支持测试模式,验证配置和连接 + +## 🚀 快速开始 + +### 1. 环境要求 + +**Windows:** +- Python 3.7+ +- 项目自带 ipmitool.exe + +**Linux:** +```bash +# Ubuntu/Debian +sudo apt-get install ipmitool + +# CentOS/RHEL +sudo yum install ipmitool + +# 或者使用项目自带的工具 +``` + +### 2. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +### 3. 配置设置 + +复制示例配置文件并修改: + +```bash +cp config.example.json config.json +``` + +编辑 `config.json` 填入你的服务器信息: + +```json +{ + "server": { + "ip": "你的服务器IP", + "username": "IPMI用户名", + "password": "IPMI密码" + }, + "temperature_policy": { + "thresholds": [ + { + "min_temp": 79, + "max_temp": 999, + "fan_speed": 40, + "description": "高温模式" + } + ] + }, + "monitoring": { + "interval_seconds": 30, + "max_retries": 3, + "retry_delay": 5, + "log_level": "INFO" + } +} +``` + +### 4. 运行 + +**测试连接:** +```bash +python main.py --test +``` + +**开始监控:** +```bash +python main.py +``` + +**使用自定义配置文件:** +```bash +python main.py -c /path/to/your/config.json +``` + +## 📋 配置说明 + +### 服务器配置 +- `ip`: 服务器的 IPMI IP 地址 +- `username`: IPMI 用户名 +- `password`: IPMI 密码 + +### 温度策略配置 + +**🎯 风扇曲线模式 (推荐)** +启用 `use_fan_curve: true` 可以实现平滑的无级调节,就像游戏本那样: + +```json +{ + "use_fan_curve": true, + "fan_curve_points": [ + { + "temp": 30, + "fan_speed": 5, + "description": "待机温度" + }, + { + "temp": 65, + "fan_speed": 35, + "description": "高负载" + } + ] +} +``` + +程序会在温度点之间进行线性插值,实现平滑的转速调节。例如: +- 30°C → 5% 转速 +- 47.5°C → 20% 转速 (自动插值) +- 65°C → 35% 转速 + +**📊 阶梯模式 (传统)** +设置 `use_fan_curve: false` 使用传统的温度区间模式: + +```json +{ + "min_temp": 最低温度, + "max_temp": 最高温度, + "fan_speed": 风扇转速百分比 (0-100), + "description": "策略描述" +} +``` + +### 监控配置 +- `interval_seconds`: 监控间隔(秒) +- `max_retries`: 命令失败时的最大重试次数 +- `retry_delay`: 重试间隔(秒) +- `log_level`: 日志级别 (DEBUG, INFO, WARNING, ERROR) + +## 📊 日志文件 + +程序会生成 `ipmi_fan_control.log` 日志文件,包含: +- 温度监控数据 +- 风扇调节操作 +- 错误和重试信息 +- 系统状态变化 + +## 🔧 命令行选项 + +```bash +python main.py [选项] + +选项: + -c, --config CONFIG 指定配置文件路径 (默认: config.json) + --test 测试模式,只获取一次温度信息 + -h, --help 显示帮助信息 +``` + +## 🛠️ 故障排除 + +### 常见问题 + +**1. 无法连接到服务器** +- 检查 IP 地址是否正确 +- 确认 IPMI 用户名和密码 +- 验证网络连通性 + +**2. 找不到 ipmitool** +- Linux: 安装 ipmitool 包 +- Windows: 确保 ipmi 目录下有 ipmitool.exe + +**3. 权限错误** +- 确认 IPMI 用户有足够的权限 +- 检查服务器 IPMI 设置 + +**4. 温度读取失败** +- 验证服务器支持温度传感器 +- 检查传感器命名是否包含 'Temp' + +### 调试模式 + +启用详细日志: +```json +{ + "monitoring": { + "log_level": "DEBUG" + } +} +``` + +## 🔒 安全注意事项 + +- 配置文件包含敏感信息,请妥善保管 +- 建议为 IPMI 创建专用用户账户 +- 定期更新 IPMI 密码 +- 限制配置文件的访问权限 + +## 🤝 贡献 + +欢迎提交 Issue 和 Pull Request! + +## 📄 许可证 + +MIT License - 详见 [LICENSE](LICENSE) 文件 + +## 🙏 致谢 + +- 基于 ipmitool 工具 +- 使用 APScheduler 进行任务调度 + +--- + +**注意**: 此工具专为戴尔服务器设计,其他品牌服务器可能需要调整 IPMI 命令。使用前请在测试环境中验证。 diff --git a/config.example.json b/config.example.json new file mode 100644 index 0000000..27054f4 --- /dev/null +++ b/config.example.json @@ -0,0 +1,80 @@ +{ + "server": { + "ip": "192.168.1.100", + "username": "admin", + "password": "your_password_here" + }, + "temperature_policy": { + "use_fan_curve": true, + "fan_curve_points": [ + { + "temp": 30, + "fan_speed": 5, + "description": "待机温度" + }, + { + "temp": 45, + "fan_speed": 10, + "description": "低负载" + }, + { + "temp": 55, + "fan_speed": 20, + "description": "中等负载" + }, + { + "temp": 65, + "fan_speed": 35, + "description": "高负载" + }, + { + "temp": 75, + "fan_speed": 50, + "description": "高温警告" + }, + { + "temp": 85, + "fan_speed": 80, + "description": "紧急散热" + } + ], + "thresholds": [ + { + "min_temp": 79, + "max_temp": 999, + "fan_speed": 40, + "description": "高温模式" + }, + { + "min_temp": 70, + "max_temp": 78, + "fan_speed": 30, + "description": "中高温模式" + }, + { + "min_temp": 60, + "max_temp": 69, + "fan_speed": 20, + "description": "中温模式" + }, + { + "min_temp": 50, + "max_temp": 59, + "fan_speed": 15, + "description": "低中温模式" + }, + { + "min_temp": 0, + "max_temp": 49, + "fan_speed": 5, + "description": "低温模式" + } + ] + }, + "monitoring": { + "interval_seconds": 30, + "max_retries": 3, + "retry_delay": 5, + "log_level": "INFO" + } +} diff --git a/config.py b/config.py new file mode 100644 index 0000000..10b13e6 --- /dev/null +++ b/config.py @@ -0,0 +1,78 @@ +import json +import os +import logging +from typing import Dict, Any, List + + +class Config: + def __init__(self, config_path: str = "config.json"): + self.config_path = config_path + self.config = self._load_config() + self._validate_config() + + def _load_config(self) -> Dict[str, Any]: + if not os.path.exists(self.config_path): + raise FileNotFoundError(f"配置文件 {self.config_path} 不存在 请先创建配置文件") + + try: + with open(self.config_path, 'r', encoding='utf-8') as f: + return json.load(f) + except json.JSONDecodeError as e: + raise ValueError(f"配置文件格式错误: {e}") + except Exception as e: + raise Exception(f"读取配置文件失败: {e}") + + def _validate_config(self): + required_fields = ['server', 'temperature_policy', 'monitoring'] + for field in required_fields: + if field not in self.config: + raise ValueError(f"配置文件缺少必需字段: {field}") + + server_config = self.config['server'] + required_server_fields = ['ip', 'username', 'password'] + for field in required_server_fields: + if field not in server_config: + raise ValueError(f"服务器配置缺少必需字段: {field}") + + if not server_config['ip'] or not server_config['username']: + raise ValueError("服务器IP和用户名不能为空") + + @property + def server_ip(self) -> str: + return self.config['server']['ip'] + + @property + def server_username(self) -> str: + return self.config['server']['username'] + + @property + def server_password(self) -> str: + return self.config['server']['password'] + + @property + def interval_seconds(self) -> int: + return self.config['monitoring'].get('interval_seconds', 30) + + @property + def temperature_thresholds(self) -> List[Dict[str, Any]]: + return self.config['temperature_policy']['thresholds'] + + @property + def use_fan_curve(self) -> bool: + return self.config['temperature_policy'].get('use_fan_curve', False) + + @property + def fan_curve_points(self) -> List[Dict[str, Any]]: + return self.config['temperature_policy'].get('fan_curve_points', []) + + @property + def max_retries(self) -> int: + return self.config['monitoring'].get('max_retries', 3) + + @property + def retry_delay(self) -> int: + return self.config['monitoring'].get('retry_delay', 5) + + @property + def log_level(self) -> str: + return self.config['monitoring'].get('log_level', 'INFO') diff --git a/main.py b/main.py index 026b2fa..1c90d8e 100644 --- a/main.py +++ b/main.py @@ -1,55 +1,252 @@ import os +import sys +import time +import logging +import argparse +import platform +import subprocess +from typing import List, Optional from apscheduler.schedulers.background import BlockingScheduler +from config import Config -tool_dir = '.\\ipmi\\' -ip = '' -username = '' -password = '' -interval_seconds = 30 +class IPMIFanController: + def __init__(self, config_path: str = "config.json"): + self.config = Config(config_path) + self.setup_logging() + self.ipmitool_path = self._get_ipmitool_path() + self.logger.info(f"IPMI风扇控制器启动 服务器: {self.config.server_ip}") + + def setup_logging(self): + log_level = getattr(logging, self.config.log_level.upper(), logging.INFO) + logging.basicConfig( + level=log_level, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('ipmi_fan_control.log', encoding='utf-8'), + logging.StreamHandler(sys.stdout) + ] + ) + self.logger = logging.getLogger(__name__) + + def _get_ipmitool_path(self) -> str: + system = platform.system().lower() + if system == "windows": + return os.path.join(".", "ipmi", "ipmitool.exe") + else: + ipmitool_path = subprocess.run(['which', 'ipmitool'], + capture_output=True, text=True) + if ipmitool_path.returncode == 0: + return ipmitool_path.stdout.strip() + else: + self.logger.warning("系统中未找到ipmitool 尝试使用相对路径") + return "ipmitool" + + def _execute_ipmi_command(self, command: str, retries: Optional[int] = None) -> Optional[str]: + max_retries = retries if retries is not None else self.config.max_retries + + full_command = f'{self.ipmitool_path} -I lanplus -H {self.config.server_ip} -U {self.config.server_username} -P {self.config.server_password} {command}' + + for attempt in range(max_retries + 1): + try: + self.logger.debug(f"执行IPMI命令 (尝试 {attempt + 1}/{max_retries + 1}): {command}") + result = subprocess.run(full_command, shell=True, capture_output=True, + text=True, timeout=30) + + if result.returncode == 0: + return result.stdout + else: + self.logger.warning(f"IPMI命令执行失败 (尝试 {attempt + 1}): {result.stderr}") + + except subprocess.TimeoutExpired: + self.logger.warning(f"IPMI命令超时 (尝试 {attempt + 1})") + except Exception as e: + self.logger.error(f"IPMI命令执行异常 (尝试 {attempt + 1}): {e}") + + if attempt < max_retries: + self.logger.info(f"等待 {self.config.retry_delay} 秒后重试...") + time.sleep(self.config.retry_delay) + + self.logger.error(f"IPMI命令执行失败 已重试 {max_retries} 次: {command}") + return None + + def disable_auto(self) -> bool: + result = self._execute_ipmi_command('raw 0x30 0x30 0x01 0x00') + if result is not None: + self.logger.debug("已禁用自动风扇控制") + return True + return False + + def enable_auto(self) -> bool: + result = self._execute_ipmi_command('raw 0x30 0x30 0x01 0x01') + if result is not None: + self.logger.debug("已启用自动风扇控制") + return True + return False + + def set_speed(self, percent: int) -> bool: + if not (0 <= percent <= 100): + self.logger.error(f"风扇转速百分比无效: {percent} (应在0-100之间)") + return False + + if not self.disable_auto(): + return False + + hex_speed = hex(percent) + result = self._execute_ipmi_command(f'raw 0x30 0x30 0x02 0xff {hex_speed}') + if result is not None: + self.logger.info(f"风扇转速设置为 {percent}%") + return True + return False + + def get_temp(self) -> List[float]: + result = self._execute_ipmi_command('sensor') + if result is None: + self.logger.error("无法获取传感器数据") + return [] + + try: + sensor_list = result.replace("\r\n", "\n").split('\n') + temp_list = [] + + for sensor in sensor_list: + if 'Temp' in sensor and '|' in sensor: + try: + temp_str = sensor.split('|')[1].strip() + if temp_str and temp_str != 'na': + temp_value = float(temp_str) + temp_list.append(temp_value) + except (IndexError, ValueError) as e: + self.logger.debug(f"跳过无效温度传感器数据: {sensor}") + continue + + if temp_list: + self.logger.debug(f"获取到温度数据: {temp_list}") + else: + self.logger.warning("未找到有效的温度传感器数据") + + return temp_list + + except Exception as e: + self.logger.error(f"解析温度数据时出错: {e}") + return [] + + def get_fan_speed_for_temp(self, temp: float) -> Optional[int]: + if self.config.use_fan_curve and self.config.fan_curve_points: + return self._calculate_fan_speed_from_curve(temp) + else: + return self._get_fan_speed_from_thresholds(temp) + + def _calculate_fan_speed_from_curve(self, temp: float) -> int: + curve_points = sorted(self.config.fan_curve_points, key=lambda x: x['temp']) + + if temp <= curve_points[0]['temp']: + speed = curve_points[0]['fan_speed'] + self.logger.debug(f"温度 {temp}°C 低于曲线起点 使用最低转速 {speed}%") + return speed + + if temp >= curve_points[-1]['temp']: + speed = curve_points[-1]['fan_speed'] + self.logger.debug(f"温度 {temp}°C 高于曲线终点 使用最高转速 {speed}%") + return speed + + for i in range(len(curve_points) - 1): + point1 = curve_points[i] + point2 = curve_points[i + 1] + + if point1['temp'] <= temp <= point2['temp']: + temp_range = point2['temp'] - point1['temp'] + speed_range = point2['fan_speed'] - point1['fan_speed'] + temp_offset = temp - point1['temp'] + + interpolated_speed = point1['fan_speed'] + (speed_range * temp_offset / temp_range) + final_speed = round(interpolated_speed) + + self.logger.debug(f"温度 {temp}°C 在 {point1['temp']}-{point2['temp']}°C 区间 " + f"插值计算转速: {final_speed}% (从 {point1['fan_speed']}% 到 {point2['fan_speed']}%)") + return final_speed + + self.logger.warning(f"风扇曲线计算异常 温度 {temp}°C 使用默认值") + return 20 + + def _get_fan_speed_from_thresholds(self, temp: float) -> int: + for threshold in self.config.temperature_thresholds: + if threshold['min_temp'] <= temp <= threshold['max_temp']: + return threshold['fan_speed'] + + self.logger.warning(f"未找到温度 {temp}°C 对应的风扇策略 使用默认值") + return 20 + + def auto_config(self): + try: + temp_list = self.get_temp() + if not temp_list: + self.logger.error("无法获取温度数据 跳过本次调节") + return + + max_temp = max(temp_list) + avg_temp = sum(temp_list) / len(temp_list) + + self.logger.info(f"当前温度 - 最高: {max_temp}°C 平均: {avg_temp:.1f}°C") + + target_speed = self.get_fan_speed_for_temp(max_temp) + if target_speed is not None: + if self.set_speed(target_speed): + curve_mode = "风扇曲线" if self.config.use_fan_curve else "阶梯模式" + self.logger.info(f"根据最高温度 {max_temp}°C 设置风扇转速为 {target_speed}% ({curve_mode})") + else: + self.logger.error("设置风扇转速失败") + + except Exception as e: + self.logger.error(f"自动配置过程中发生错误: {e}") + + def start_monitoring(self): + try: + self.logger.info(f"开始监控 间隔: {self.config.interval_seconds}秒") + scheduler = BlockingScheduler() + scheduler.add_job(self.auto_config, 'interval', + seconds=self.config.interval_seconds) + scheduler.start() + except KeyboardInterrupt: + self.logger.info("收到停止信号 正在关闭...") + except Exception as e: + self.logger.error(f"监控过程中发生错误: {e}") -def disable_auto(): - os.popen(f'{tool_dir}ipmitool.exe -I lanplus -H {ip} -U {username} -P {password} raw 0x30 0x30 0x01 0x00') - -def enable_auto(): - os.popen(f'{tool_dir}ipmitool.exe -I lanplus -H {ip} -U {username} -P {password} raw 0x30 0x30 0x01 0x01') - - -def set_speed(percent): - disable_auto() - os.popen( - f'{tool_dir}ipmitool.exe -I lanplus -H {ip} -U {username} -P {password} raw 0x30 0x30 0x02 0xff {hex(percent)}') - - -def get_temp(): - result = os.popen(f'{tool_dir}ipmitool.exe -I lanplus -H {ip} -U {username} -P {password} sensor').read() - result = result.replace("\r\n", "\n") - sensor_list = result.split('\n') - temp_list = [] - for sensor in sensor_list: - if 'Temp' in sensor: - temp_list.append(float(sensor.split('|')[1].strip())) - return temp_list - - -def auto_config(): - temp_list = get_temp() - temp = max(temp_list) - print(f'current temp:{temp}') - if temp >= 79: - set_speed(40) - elif 70 <= temp < 79: - set_speed(30) - elif 60 <= temp < 70: - set_speed(20) - elif 50 <= temp < 60: - set_speed(15) - elif temp < 50: - set_speed(5) +def main(): + parser = argparse.ArgumentParser(description='IPMI自动风扇控制工具') + parser.add_argument('-c', '--config', default='config.json', + help='配置文件路径 (默认: config.json)') + parser.add_argument('--test', action='store_true', + help='测试模式 只获取一次温度信息') + + args = parser.parse_args() + + try: + controller = IPMIFanController(args.config) + + if args.test: + controller.logger.info("测试模式 获取当前温度信息...") + temp_list = controller.get_temp() + if temp_list: + max_temp = max(temp_list) + avg_temp = sum(temp_list) / len(temp_list) + print(f"温度信息 - 最高: {max_temp}°C 平均: {avg_temp:.1f}°C") + target_speed = controller.get_fan_speed_for_temp(max_temp) + print(f"建议风扇转速: {target_speed}%") + else: + print("无法获取温度信息") + else: + controller.start_monitoring() + + except FileNotFoundError as e: + print(f"错误: {e}") + print("请先复制 config.example.json 为 config.json 并填入正确的服务器信息") + sys.exit(1) + except Exception as e: + print(f"启动失败: {e}") + sys.exit(1) if __name__ == '__main__': - scheduler = BlockingScheduler() - scheduler.add_job(auto_config, 'interval', seconds=interval_seconds) - scheduler.start() + main() diff --git a/requirements.txt b/requirements.txt index 50591fa..d1bd762 100644 Binary files a/requirements.txt and b/requirements.txt differ