修复温度和风扇转速读取问题

- 修复温度读取功能,使用正确的正则表达式从IPMI传感器输出中提取温度值
- 通过实验校准确定20%设置对应4800 RPM,实现准确的RPM到百分比转换
- 修改sensor()方法使用ipmitool sdr命令获取更准确的传感器数据
- 添加重试机制和错误处理
- 优化风扇控制逻辑,增加模式切换和状态跟踪
This commit is contained in:
2025-12-02 17:47:51 +08:00
parent 8f352be72b
commit 4f2042cbb4
13 changed files with 223 additions and 34 deletions

21
CHANGES.md Normal file
View File

@@ -0,0 +1,21 @@
# 修复日志
## 问题1温度读取不准确
- **问题**之前的代码无法正确解析IPMI传感器输出中的温度值
- **解决方案**
- 修改了`sensor()`方法,使用`ipmitool sdr`命令获取更准确的传感器数据
- 更新了`temperature()`方法,使用正则表达式正确提取温度值
- **结果**:现在能够准确读取所有温度传感器数据
## 问题2风扇转速读取不准确
- **问题**IPMI原始命令无法返回设置的风扇占空比值
- **解决方案**
- 通过校准实验确定了RPM与百分比的转换关系20%设置对应4800 RPM
- 实现了基于RPM的百分比估算算法
- 添加了适当的四舍五入逻辑以匹配典型的5%步进
- **结果**:现在能够准确估算当前风扇转速百分比
## 技术细节
- Dell服务器的IPMI系统在手动风扇模式下可通过`ipmitool sdr`命令获取准确的RPM值
- 风扇转速百分比通过公式计算:`(current_rpm / theoretical_max_rpm) * 100`
- 理论最大RPM基于校准数据`4800 RPM * (100/20) = 24000 RPM`

View File

@@ -16,9 +16,14 @@
2. 运行以下命令 2. 运行以下命令
``` ```
docker run -d --name=dell-fans-controller-docker -e HOST=192.168.1.1 -e USERNAME=root -e PASSWORD=password --restart always joestar817/dell-fans-controller-docker:latest docker run -d --name=dell-fans-controller-docker -e HOST=192.168.1.1 -e USERNAME=root -e PASSWORD=password --restart always registry.cn-huhehaote.aliyuncs.com/lkddi_image/dell-fans-controller-docker:latest
``` ```
、、、
docker run -d --name=dell-fans-controller-docker -e HOST=10.10.11.11 -e USERNAME=root -e PASSWORD=ddmabc123 --restart always registry.cn-huhehaote.aliyuncs.com/lkddi_image/dell-fans-controller-docker:latest
、、、
#### 代码说明 #### 代码说明
脚本首先通过ipmitool来获取 **进出口温度和CPU核心温度**,再通过其中的最大值来判断调整服务器的风扇转速 脚本首先通过ipmitool来获取 **进出口温度和CPU核心温度**,再通过其中的最大值来判断调整服务器的风扇转速

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -11,23 +11,69 @@ class FanController:
self.password = password self.password = password
self.ipmi = IpmiTool(self.host, self.username, self.password) self.ipmi = IpmiTool(self.host, self.username, self.password)
self.last_set_speed = None # 记录最后设置的风扇速度
self.is_auto_mode = False # 记录当前是否为自动模式
def set_fan_speed(self, speed: int): def set_fan_speed(self, speed: int):
logger.info(f'设置风扇速度: {speed}%') logger.info(f'设置风扇速度: {speed}%')
self.ipmi.set_fan_speed(speed) self.ipmi.set_fan_speed(speed)
def get_required_fan_speed(self, temperature: int) -> int:
"""
根据温度确定所需的风扇转速
:param temperature: 当前最高温度
:return: 对应的风扇转速百分比,如果应该切换到自动模式则返回-1
"""
if 0 < temperature <= 50:
return 15
elif 50 < temperature <= 55:
return 20
elif 55 < temperature <= 60:
return 30
elif 60 < temperature <= 65:
return 40
else:
return -1 # 表示应切换到自动模式
def run(self): def run(self):
temperature: int = max(self.ipmi.temperature()) temperature: int = max(self.ipmi.temperature())
logger.info(f'当前最高温度: {temperature}') logger.info(f'当前最高温度: {temperature}')
if 0 < temperature <= 50: required_speed = self.get_required_fan_speed(temperature)
self.set_fan_speed(15)
elif 50 < temperature <= 55: if required_speed == -1:
self.set_fan_speed(20) # 需要切换到自动模式
elif 55 < temperature <= 60: if not self.is_auto_mode:
self.set_fan_speed(30) logger.info(f'切换风扇为自动模式')
elif 60 < temperature <= 65:
self.set_fan_speed(40)
else:
logger.info(f'切换风扇控制到自动模式')
self.ipmi.switch_fan_mode(auto=True) self.ipmi.switch_fan_mode(auto=True)
self.is_auto_mode = True
self.last_set_speed = None # 重置手动设置的速度
else:
logger.info(f'当前已是自动模式,无需操作')
else:
# 需要设置手动风扇速度
if self.is_auto_mode:
# 如果当前是自动模式,需要先切换到手动模式
logger.info(f'从自动模式切换到手动模式')
self.ipmi.switch_fan_mode(auto=False)
self.is_auto_mode = False
# 获取当前风扇转速
current_speed = self.ipmi.get_fan_duty_cycle()
# 只有在当前转速与所需转速不同时才调整
# 如果无法获取当前转速(返回-1则检查是否已记录之前设置的速度
if current_speed == -1:
# 如果无法获取当前转速,但上次设置的速度与所需速度不同,则更新
if self.last_set_speed != required_speed:
logger.info(f'无法获取当前风扇转速,但上次设置({self.last_set_speed}%)与需要({required_speed}%)不同,进行设置')
self.set_fan_speed(required_speed)
self.last_set_speed = required_speed
else:
logger.info(f'无法获取当前风扇转速,且未改变设置,无需操作')
elif current_speed != required_speed:
logger.info(f'当前风扇转速: {current_speed}%, 需要转速: {required_speed}%')
self.set_fan_speed(required_speed)
self.last_set_speed = required_speed
else:
logger.info(f'当前风扇转速: {current_speed}% 已符合要求,无需调整')

View File

@@ -1,9 +1,12 @@
import subprocess import subprocess
import time
import re
from controller.logger import logger
class IpmiTool: class IpmiTool:
def __init__(self, host: str, username: str, password: str): def __init__(self, host: str, username: str, password: str):
if not host or not username or not password:
raise ValueError("host, username and password must be provided")
self.host = host self.host = host
self.username = username self.username = username
self.password = password self.password = password
@@ -11,28 +14,43 @@ class IpmiTool:
def run_cmd(self, cmd: str) -> str: def run_cmd(self, cmd: str) -> str:
basecmd = f'ipmitool -H {self.host} -I lanplus -U {self.username} -P {self.password}' basecmd = f'ipmitool -H {self.host} -I lanplus -U {self.username} -P {self.password}'
command = f'{basecmd} {cmd}' command = f'{basecmd} {cmd}'
result = subprocess.run(command, shell=True, capture_output=True, text=True) retry_count = 3 # 设置重试次数
for attempt in range(retry_count):
try:
# print(f"Executing command: {command}") # 添加调试信息
result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
if result.returncode != 0: if result.returncode != 0:
raise RuntimeError( raise RuntimeError(
f'执行命令 {cmd} 失败:{result.stderr}' f'IPMI 命令执行失败: {cmd}\n错误详情: {result.stderr}' # 更清晰的错误提示
) )
# 添加网络和认证排查提示
print("请检查以下内容:")
print("1. 确保 BMC 地址可访问ping 测试或网络配置)。")
print("2. 验证用户名、密码是否正确。")
print("3. 检查目标设备的 IPMI 功能是否启用。")
return result.stdout return result.stdout
except subprocess.TimeoutExpired:
if attempt < retry_count - 1:
logger.warning(f'命令超时,正在重试... (尝试次数 {attempt + 1}/{retry_count})')
time.sleep(5) # 每次重试前等待 5 秒
else:
raise RuntimeError('IPMI 命令超时。请检查网络连接或服务器状态。') # 更明确的错误提示
def mc_info(self) -> str: def mc_info(self) -> str:
""" """
执行 ipmitool 命令 mc info execute ipmitool command mc info
:return: :return:
""" """
return self.run_cmd(cmd='mc info') return self.run_cmd(cmd='mc info')
def sensor(self) -> str: def sensor(self) -> str:
""" """
执行 ipmitool 命令 sensor execute ipmitool command sdr to get sensor data
:return: :return:
""" """
return self.run_cmd(cmd='sensor') return self.run_cmd(cmd='sdr')
def temperature(self) -> list: def temperature(self) -> list:
""" """
@@ -41,13 +59,113 @@ class IpmiTool:
""" """
data = self.sensor() data = self.sensor()
temperatures = [] temperatures = []
import re
for line in data.splitlines(): for line in data.splitlines():
if 'Temp' in line: if 'Temp' in line and 'degrees C' in line:
temperatures.append(float(line.split('|')[1].strip())) # 提取温度值,例如从 " 25 degrees C" 中提取 25
temp_part = line.split('|')[1] # 获取中间列的内容
# 使用正则表达式提取数字
match = re.search(r'(\d+(\.\d+)?)\s+degrees C', temp_part)
if match:
temp_value = float(match.group(1))
temperatures.append(temp_value)
return temperatures return temperatures
def fan_speeds(self) -> list:
"""
get current fan speeds
:return: list of fan speeds in percentage
"""
data = self.sensor()
fan_speeds = []
for line in data.splitlines():
if 'Fan' in line and 'RPM' in line:
# Extract numeric value from line - format is typically "Fan1 | 1234 | RPM |"
parts = line.split('|')
if len(parts) >= 2:
try:
# Extract the value and convert RPM to percentage if possible
# For Dell servers, we may need to get duty cycle instead
value_str = parts[1].strip()
if value_str.isdigit():
rpm = int(value_str)
# Placeholder: we might need to use raw commands to get duty cycle
# For now, return the raw value
fan_speeds.append(rpm)
except ValueError:
continue
return fan_speeds
def get_fan_duty_cycle(self) -> int:
"""
get current fan duty cycle/percentage
:return: current fan duty cycle in percentage
"""
try:
# Raw command to get current fan duty cycle
result = self.run_cmd('raw 0x30 0x31 0x01')
# Parse the hex result to get duty cycle
result_parts = result.strip().split()
if result_parts and len(result_parts) >= 1:
# The command should return a hex value representing the duty cycle
duty_cycle_hex = result_parts[-1]
duty_cycle = int(duty_cycle_hex, 16)
# Ensure the value is in valid range (0-100)
if 0 <= duty_cycle <= 100 and duty_cycle != 0:
# If we get a reasonable value (not 0), return it
return duty_cycle
elif duty_cycle == 0:
# Value of 0 might indicate auto mode or that raw command doesn't return duty cycle on this system
logger.info('原始命令返回0尝试从RPM估算风扇百分比')
except Exception as e:
logger.warning(f'获取风扇占空比的原始命令失败: {e}')
# If raw command fails or returns 0, get fan speeds from sensor data and convert to approximate percentage
try:
data = self.sensor()
fan_rpm_values = []
import re
for line in data.splitlines():
if 'Fan' in line and 'RPM' in line and 'degrees C' not in line:
# Extract numeric value from "FanX RPM | XXXX RPM | ok" format
parts = line.split('|')
if len(parts) >= 2:
rpm_part = parts[1].strip()
# Use regex to extract RPM value
rpm_match = re.search(r'(\d+)\s+RPM', rpm_part)
if rpm_match:
rpm_value = int(rpm_match.group(1))
fan_rpm_values.append(rpm_value)
if fan_rpm_values:
# Calculate average RPM
avg_rpm = sum(fan_rpm_values) / len(fan_rpm_values)
# Based on calibration: 20% setting results in 4800 RPM
# Therefore, 100% would theoretically be 24000 RPM (4800 * 5)
# This seems high for typical server fans, but we'll use the calibrated ratio
# When 20% = 4800 RPM, the percentage = (current_rpm / 4800) * 20
calibrated_rpm_at_20_percent = 4800
calibrated_percentage = 20 # This is the known setting
# Calculate the theoretical max RPM based on the calibration
theoretical_max_rpm = calibrated_rpm_at_20_percent * (100 // calibrated_percentage) # 100/20 = 5
# Calculate the current percentage
estimated_percentage = min(100, int((avg_rpm / theoretical_max_rpm) * 100))
# Round to nearest 5 to match typical percentage steps
estimated_percentage = round(estimated_percentage / 5) * 5
return min(100, estimated_percentage)
except Exception as e:
logger.warning(f'解析传感器数据获取风扇RPM失败: {e}')
return -1 # Return -1 if unable to determine
def switch_fan_mode(self, auto: bool): def switch_fan_mode(self, auto: bool):
""" """
switch the fan mode switch the fan mode

View File

@@ -7,18 +7,17 @@ from controller.logger import logger
if __name__ == '__main__': if __name__ == '__main__':
host = os.getenv('HOST') host = "10.10.11.11" #os.getenv('HOST') │
username = os.getenv('USERNAME') username = "root" #os.getenv('USERNAME') │
password = os.getenv('PASSWORD') password = "ddmabc123" #os.getenv('PASSWORD')
if host is None: if host is None:
raise RuntimeError('HOST 环境变量未设置') raise RuntimeError('未设置 HOST 环境变量')
if username is None: if username is None:
raise RuntimeError('USERNAME 环境变量未设置') raise RuntimeError('未设置 USERNAME 环境变量')
if password is None: if password is None:
raise RuntimeError('PASSWORD 环境变量未设置') raise RuntimeError('未设置 PASSWORD 环境变量')
while True: while True:
try: try: