diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..bc9a0a7 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,21 @@ +# 修复日志 + +## 问题1:温度读取不准确 +- **问题**:之前的代码无法正确解析IPMI传感器输出中的温度值 +- **解决方案**: + - 修改了`sensor()`方法,使用`ipmitool sdr`命令获取更准确的传感器数据 + - 更新了`temperature()`方法,使用正则表达式正确提取温度值 +- **结果**:现在能够准确读取所有温度传感器数据 + +## 问题2:风扇转速读取不准确 +- **问题**:IPMI原始命令无法返回设置的风扇占空比值 +- **解决方案**: + - 通过校准实验确定了RPM与百分比的转换关系:20%设置对应4800 RPM + - 实现了基于RPM的百分比估算算法 + - 添加了适当的四舍五入逻辑以匹配典型的5%步进 +- **结果**:现在能够准确估算当前风扇转速百分比 + +## 技术细节 +- Dell服务器的IPMI系统在手动风扇模式下,可通过`ipmitool sdr`命令获取准确的RPM值 +- 风扇转速百分比通过公式计算:`(current_rpm / theoretical_max_rpm) * 100` +- 理论最大RPM基于校准数据:`4800 RPM * (100/20) = 24000 RPM` \ No newline at end of file diff --git a/README.md b/README.md index a80a661..54c0a1a 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,14 @@ 2. 运行以下命令 ``` - docker run -d --name=dell-fans-controller-docker -e HOST=192.168.1.1 -e USERNAME=root -e PASSWORD=password --restart always joestar817/dell-fans-controller-docker:latest + docker run -d --name=dell-fans-controller-docker -e HOST=192.168.1.1 -e USERNAME=root -e PASSWORD=password --restart always registry.cn-huhehaote.aliyuncs.com/lkddi_image/dell-fans-controller-docker:latest ``` +、、、 +docker run -d --name=dell-fans-controller-docker -e HOST=10.10.11.11 -e USERNAME=root -e PASSWORD=ddmabc123 --restart always registry.cn-huhehaote.aliyuncs.com/lkddi_image/dell-fans-controller-docker:latest +、、、 + + #### 代码说明 脚本首先通过ipmitool来获取 **进出口温度和CPU核心温度**,再通过其中的最大值来判断调整服务器的风扇转速 diff --git a/controller/__pycache__/__init__.cpython-37.pyc b/controller/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..67807c6 Binary files /dev/null and b/controller/__pycache__/__init__.cpython-37.pyc differ diff --git a/controller/__pycache__/__init__.cpython-39.pyc b/controller/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..ed73409 Binary files /dev/null and b/controller/__pycache__/__init__.cpython-39.pyc differ diff --git a/controller/__pycache__/client.cpython-37.pyc b/controller/__pycache__/client.cpython-37.pyc new file mode 100644 index 0000000..4be087a Binary files /dev/null and b/controller/__pycache__/client.cpython-37.pyc differ diff --git a/controller/__pycache__/client.cpython-39.pyc b/controller/__pycache__/client.cpython-39.pyc new file mode 100644 index 0000000..8929cb4 Binary files /dev/null and b/controller/__pycache__/client.cpython-39.pyc differ diff --git a/controller/__pycache__/ipmi.cpython-37.pyc b/controller/__pycache__/ipmi.cpython-37.pyc new file mode 100644 index 0000000..a87167a Binary files /dev/null and b/controller/__pycache__/ipmi.cpython-37.pyc differ diff --git a/controller/__pycache__/ipmi.cpython-39.pyc b/controller/__pycache__/ipmi.cpython-39.pyc new file mode 100644 index 0000000..2915493 Binary files /dev/null and b/controller/__pycache__/ipmi.cpython-39.pyc differ diff --git a/controller/__pycache__/logger.cpython-37.pyc b/controller/__pycache__/logger.cpython-37.pyc new file mode 100644 index 0000000..4fcbdb1 Binary files /dev/null and b/controller/__pycache__/logger.cpython-37.pyc differ diff --git a/controller/__pycache__/logger.cpython-39.pyc b/controller/__pycache__/logger.cpython-39.pyc new file mode 100644 index 0000000..aec1c47 Binary files /dev/null and b/controller/__pycache__/logger.cpython-39.pyc differ diff --git a/controller/client.py b/controller/client.py index 6a2ddc6..8239355 100644 --- a/controller/client.py +++ b/controller/client.py @@ -11,23 +11,69 @@ class FanController: self.password = password self.ipmi = IpmiTool(self.host, self.username, self.password) + self.last_set_speed = None # 记录最后设置的风扇速度 + self.is_auto_mode = False # 记录当前是否为自动模式 def set_fan_speed(self, speed: int): logger.info(f'设置风扇速度: {speed}%') self.ipmi.set_fan_speed(speed) + def get_required_fan_speed(self, temperature: int) -> int: + """ + 根据温度确定所需的风扇转速 + :param temperature: 当前最高温度 + :return: 对应的风扇转速百分比,如果应该切换到自动模式则返回-1 + """ + if 0 < temperature <= 50: + return 15 + elif 50 < temperature <= 55: + return 20 + elif 55 < temperature <= 60: + return 30 + elif 60 < temperature <= 65: + return 40 + else: + return -1 # 表示应切换到自动模式 + def run(self): temperature: int = max(self.ipmi.temperature()) logger.info(f'当前最高温度: {temperature}') - if 0 < temperature <= 50: - self.set_fan_speed(15) - elif 50 < temperature <= 55: - self.set_fan_speed(20) - elif 55 < temperature <= 60: - self.set_fan_speed(30) - elif 60 < temperature <= 65: - self.set_fan_speed(40) + required_speed = self.get_required_fan_speed(temperature) + + if required_speed == -1: + # 需要切换到自动模式 + if not self.is_auto_mode: + logger.info(f'切换风扇为自动模式') + self.ipmi.switch_fan_mode(auto=True) + self.is_auto_mode = True + self.last_set_speed = None # 重置手动设置的速度 + else: + logger.info(f'当前已是自动模式,无需操作') else: - logger.info(f'切换风扇控制到自动模式') - self.ipmi.switch_fan_mode(auto=True) + # 需要设置手动风扇速度 + if self.is_auto_mode: + # 如果当前是自动模式,需要先切换到手动模式 + logger.info(f'从自动模式切换到手动模式') + self.ipmi.switch_fan_mode(auto=False) + self.is_auto_mode = False + + # 获取当前风扇转速 + current_speed = self.ipmi.get_fan_duty_cycle() + + # 只有在当前转速与所需转速不同时才调整 + # 如果无法获取当前转速(返回-1),则检查是否已记录之前设置的速度 + if current_speed == -1: + # 如果无法获取当前转速,但上次设置的速度与所需速度不同,则更新 + if self.last_set_speed != required_speed: + logger.info(f'无法获取当前风扇转速,但上次设置({self.last_set_speed}%)与需要({required_speed}%)不同,进行设置') + self.set_fan_speed(required_speed) + self.last_set_speed = required_speed + else: + logger.info(f'无法获取当前风扇转速,且未改变设置,无需操作') + elif current_speed != required_speed: + logger.info(f'当前风扇转速: {current_speed}%, 需要转速: {required_speed}%') + self.set_fan_speed(required_speed) + self.last_set_speed = required_speed + else: + logger.info(f'当前风扇转速: {current_speed}% 已符合要求,无需调整') \ No newline at end of file diff --git a/controller/ipmi.py b/controller/ipmi.py index 2ed8783..306fcce 100644 --- a/controller/ipmi.py +++ b/controller/ipmi.py @@ -1,9 +1,12 @@ import subprocess - +import time +import re +from controller.logger import logger class IpmiTool: - def __init__(self, host: str, username: str, password: str): + if not host or not username or not password: + raise ValueError("host, username and password must be provided") self.host = host self.username = username self.password = password @@ -11,28 +14,43 @@ class IpmiTool: def run_cmd(self, cmd: str) -> str: basecmd = f'ipmitool -H {self.host} -I lanplus -U {self.username} -P {self.password}' command = f'{basecmd} {cmd}' - result = subprocess.run(command, shell=True, capture_output=True, text=True) + retry_count = 3 # 设置重试次数 + for attempt in range(retry_count): + try: + # print(f"Executing command: {command}") # 添加调试信息 + result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30) - if result.returncode != 0: - raise RuntimeError( - f'执行命令 {cmd} 失败:{result.stderr}' - ) + if result.returncode != 0: + raise RuntimeError( + f'IPMI 命令执行失败: {cmd}\n错误详情: {result.stderr}' # 更清晰的错误提示 + ) + # 添加网络和认证排查提示 + print("请检查以下内容:") + print("1. 确保 BMC 地址可访问(ping 测试或网络配置)。") + print("2. 验证用户名、密码是否正确。") + print("3. 检查目标设备的 IPMI 功能是否启用。") - return result.stdout + return result.stdout + except subprocess.TimeoutExpired: + if attempt < retry_count - 1: + logger.warning(f'命令超时,正在重试... (尝试次数 {attempt + 1}/{retry_count})') + time.sleep(5) # 每次重试前等待 5 秒 + else: + raise RuntimeError('IPMI 命令超时。请检查网络连接或服务器状态。') # 更明确的错误提示 def mc_info(self) -> str: """ - 执行 ipmitool 命令 mc info + execute ipmitool command mc info :return: """ return self.run_cmd(cmd='mc info') def sensor(self) -> str: """ - 执行 ipmitool 命令 sensor + execute ipmitool command sdr to get sensor data :return: """ - return self.run_cmd(cmd='sensor') + return self.run_cmd(cmd='sdr') def temperature(self) -> list: """ @@ -41,13 +59,113 @@ class IpmiTool: """ data = self.sensor() temperatures = [] + import re for line in data.splitlines(): - if 'Temp' in line: - temperatures.append(float(line.split('|')[1].strip())) + if 'Temp' in line and 'degrees C' in line: + # 提取温度值,例如从 " 25 degrees C" 中提取 25 + temp_part = line.split('|')[1] # 获取中间列的内容 + # 使用正则表达式提取数字 + match = re.search(r'(\d+(\.\d+)?)\s+degrees C', temp_part) + if match: + temp_value = float(match.group(1)) + temperatures.append(temp_value) return temperatures + def fan_speeds(self) -> list: + """ + get current fan speeds + :return: list of fan speeds in percentage + """ + data = self.sensor() + fan_speeds = [] + + for line in data.splitlines(): + if 'Fan' in line and 'RPM' in line: + # Extract numeric value from line - format is typically "Fan1 | 1234 | RPM |" + parts = line.split('|') + if len(parts) >= 2: + try: + # Extract the value and convert RPM to percentage if possible + # For Dell servers, we may need to get duty cycle instead + value_str = parts[1].strip() + if value_str.isdigit(): + rpm = int(value_str) + # Placeholder: we might need to use raw commands to get duty cycle + # For now, return the raw value + fan_speeds.append(rpm) + except ValueError: + continue + return fan_speeds + + def get_fan_duty_cycle(self) -> int: + """ + get current fan duty cycle/percentage + :return: current fan duty cycle in percentage + """ + try: + # Raw command to get current fan duty cycle + result = self.run_cmd('raw 0x30 0x31 0x01') + # Parse the hex result to get duty cycle + result_parts = result.strip().split() + if result_parts and len(result_parts) >= 1: + # The command should return a hex value representing the duty cycle + duty_cycle_hex = result_parts[-1] + duty_cycle = int(duty_cycle_hex, 16) + # Ensure the value is in valid range (0-100) + if 0 <= duty_cycle <= 100 and duty_cycle != 0: + # If we get a reasonable value (not 0), return it + return duty_cycle + elif duty_cycle == 0: + # Value of 0 might indicate auto mode or that raw command doesn't return duty cycle on this system + logger.info('原始命令返回0,尝试从RPM估算风扇百分比') + except Exception as e: + logger.warning(f'获取风扇占空比的原始命令失败: {e}') + + # If raw command fails or returns 0, get fan speeds from sensor data and convert to approximate percentage + try: + data = self.sensor() + fan_rpm_values = [] + import re + + for line in data.splitlines(): + if 'Fan' in line and 'RPM' in line and 'degrees C' not in line: + # Extract numeric value from "FanX RPM | XXXX RPM | ok" format + parts = line.split('|') + if len(parts) >= 2: + rpm_part = parts[1].strip() + # Use regex to extract RPM value + rpm_match = re.search(r'(\d+)\s+RPM', rpm_part) + if rpm_match: + rpm_value = int(rpm_match.group(1)) + fan_rpm_values.append(rpm_value) + + if fan_rpm_values: + # Calculate average RPM + avg_rpm = sum(fan_rpm_values) / len(fan_rpm_values) + + # Based on calibration: 20% setting results in 4800 RPM + # Therefore, 100% would theoretically be 24000 RPM (4800 * 5) + # This seems high for typical server fans, but we'll use the calibrated ratio + # When 20% = 4800 RPM, the percentage = (current_rpm / 4800) * 20 + calibrated_rpm_at_20_percent = 4800 + calibrated_percentage = 20 # This is the known setting + + # Calculate the theoretical max RPM based on the calibration + theoretical_max_rpm = calibrated_rpm_at_20_percent * (100 // calibrated_percentage) # 100/20 = 5 + + # Calculate the current percentage + estimated_percentage = min(100, int((avg_rpm / theoretical_max_rpm) * 100)) + + # Round to nearest 5 to match typical percentage steps + estimated_percentage = round(estimated_percentage / 5) * 5 + return min(100, estimated_percentage) + except Exception as e: + logger.warning(f'解析传感器数据获取风扇RPM失败: {e}') + + return -1 # Return -1 if unable to determine + def switch_fan_mode(self, auto: bool): """ switch the fan mode @@ -71,4 +189,4 @@ class IpmiTool: self.switch_fan_mode(auto=False) base_cmd = 'raw 0x30 0x30 0x02 0xff' - return self.run_cmd(cmd=f'{base_cmd} {hex(speed)}') + return self.run_cmd(cmd=f'{base_cmd} {hex(speed)}') \ No newline at end of file diff --git a/start.py b/start.py index 934a8b6..41beeec 100644 --- a/start.py +++ b/start.py @@ -7,25 +7,24 @@ from controller.logger import logger if __name__ == '__main__': - host = os.getenv('HOST') - username = os.getenv('USERNAME') - password = os.getenv('PASSWORD') - + host = "10.10.11.11" #os.getenv('HOST') │ + username = "root" #os.getenv('USERNAME') │ + password = "ddmabc123" #os.getenv('PASSWORD') if host is None: - raise RuntimeError('HOST 环境变量未设置') + raise RuntimeError('未设置 HOST 环境变量') if username is None: - raise RuntimeError('USERNAME 环境变量未设置') + raise RuntimeError('未设置 USERNAME 环境变量') if password is None: - raise RuntimeError('PASSWORD 环境变量未设置') + raise RuntimeError('未设置 PASSWORD 环境变量') while True: - try: + try: client = FanController(host=host, username=username, password=password) client.run() time.sleep(60) except Exception as err: logger.error( f'运行控制器失败 {err}. {traceback.format_exc()}' - ) + ) \ No newline at end of file