mirror of
https://github.com/lkddi/dell-fans-controller-docker.git
synced 2026-04-03 09:55:11 +08:00
修复温度和风扇转速读取问题
- 修复温度读取功能,使用正确的正则表达式从IPMI传感器输出中提取温度值 - 通过实验校准确定20%设置对应4800 RPM,实现准确的RPM到百分比转换 - 修改sensor()方法使用ipmitool sdr命令获取更准确的传感器数据 - 添加重试机制和错误处理 - 优化风扇控制逻辑,增加模式切换和状态跟踪
This commit is contained in:
21
CHANGES.md
Normal file
21
CHANGES.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# 修复日志
|
||||||
|
|
||||||
|
## 问题1:温度读取不准确
|
||||||
|
- **问题**:之前的代码无法正确解析IPMI传感器输出中的温度值
|
||||||
|
- **解决方案**:
|
||||||
|
- 修改了`sensor()`方法,使用`ipmitool sdr`命令获取更准确的传感器数据
|
||||||
|
- 更新了`temperature()`方法,使用正则表达式正确提取温度值
|
||||||
|
- **结果**:现在能够准确读取所有温度传感器数据
|
||||||
|
|
||||||
|
## 问题2:风扇转速读取不准确
|
||||||
|
- **问题**:IPMI原始命令无法返回设置的风扇占空比值
|
||||||
|
- **解决方案**:
|
||||||
|
- 通过校准实验确定了RPM与百分比的转换关系:20%设置对应4800 RPM
|
||||||
|
- 实现了基于RPM的百分比估算算法
|
||||||
|
- 添加了适当的四舍五入逻辑以匹配典型的5%步进
|
||||||
|
- **结果**:现在能够准确估算当前风扇转速百分比
|
||||||
|
|
||||||
|
## 技术细节
|
||||||
|
- Dell服务器的IPMI系统在手动风扇模式下,可通过`ipmitool sdr`命令获取准确的RPM值
|
||||||
|
- 风扇转速百分比通过公式计算:`(current_rpm / theoretical_max_rpm) * 100`
|
||||||
|
- 理论最大RPM基于校准数据:`4800 RPM * (100/20) = 24000 RPM`
|
||||||
@@ -16,9 +16,14 @@
|
|||||||
|
|
||||||
2. 运行以下命令
|
2. 运行以下命令
|
||||||
```
|
```
|
||||||
docker run -d --name=dell-fans-controller-docker -e HOST=192.168.1.1 -e USERNAME=root -e PASSWORD=password --restart always joestar817/dell-fans-controller-docker:latest
|
docker run -d --name=dell-fans-controller-docker -e HOST=192.168.1.1 -e USERNAME=root -e PASSWORD=password --restart always registry.cn-huhehaote.aliyuncs.com/lkddi_image/dell-fans-controller-docker:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
|
、、、
|
||||||
|
docker run -d --name=dell-fans-controller-docker -e HOST=10.10.11.11 -e USERNAME=root -e PASSWORD=ddmabc123 --restart always registry.cn-huhehaote.aliyuncs.com/lkddi_image/dell-fans-controller-docker:latest
|
||||||
|
、、、
|
||||||
|
|
||||||
|
|
||||||
#### 代码说明
|
#### 代码说明
|
||||||
|
|
||||||
脚本首先通过ipmitool来获取 **进出口温度和CPU核心温度**,再通过其中的最大值来判断调整服务器的风扇转速
|
脚本首先通过ipmitool来获取 **进出口温度和CPU核心温度**,再通过其中的最大值来判断调整服务器的风扇转速
|
||||||
|
|||||||
BIN
controller/__pycache__/__init__.cpython-37.pyc
Normal file
BIN
controller/__pycache__/__init__.cpython-37.pyc
Normal file
Binary file not shown.
BIN
controller/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
controller/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
controller/__pycache__/client.cpython-37.pyc
Normal file
BIN
controller/__pycache__/client.cpython-37.pyc
Normal file
Binary file not shown.
BIN
controller/__pycache__/client.cpython-39.pyc
Normal file
BIN
controller/__pycache__/client.cpython-39.pyc
Normal file
Binary file not shown.
BIN
controller/__pycache__/ipmi.cpython-37.pyc
Normal file
BIN
controller/__pycache__/ipmi.cpython-37.pyc
Normal file
Binary file not shown.
BIN
controller/__pycache__/ipmi.cpython-39.pyc
Normal file
BIN
controller/__pycache__/ipmi.cpython-39.pyc
Normal file
Binary file not shown.
BIN
controller/__pycache__/logger.cpython-37.pyc
Normal file
BIN
controller/__pycache__/logger.cpython-37.pyc
Normal file
Binary file not shown.
BIN
controller/__pycache__/logger.cpython-39.pyc
Normal file
BIN
controller/__pycache__/logger.cpython-39.pyc
Normal file
Binary file not shown.
@@ -11,23 +11,69 @@ class FanController:
|
|||||||
self.password = password
|
self.password = password
|
||||||
|
|
||||||
self.ipmi = IpmiTool(self.host, self.username, self.password)
|
self.ipmi = IpmiTool(self.host, self.username, self.password)
|
||||||
|
self.last_set_speed = None # 记录最后设置的风扇速度
|
||||||
|
self.is_auto_mode = False # 记录当前是否为自动模式
|
||||||
|
|
||||||
def set_fan_speed(self, speed: int):
|
def set_fan_speed(self, speed: int):
|
||||||
logger.info(f'设置风扇速度: {speed}%')
|
logger.info(f'设置风扇速度: {speed}%')
|
||||||
self.ipmi.set_fan_speed(speed)
|
self.ipmi.set_fan_speed(speed)
|
||||||
|
|
||||||
|
def get_required_fan_speed(self, temperature: int) -> int:
|
||||||
|
"""
|
||||||
|
根据温度确定所需的风扇转速
|
||||||
|
:param temperature: 当前最高温度
|
||||||
|
:return: 对应的风扇转速百分比,如果应该切换到自动模式则返回-1
|
||||||
|
"""
|
||||||
|
if 0 < temperature <= 50:
|
||||||
|
return 15
|
||||||
|
elif 50 < temperature <= 55:
|
||||||
|
return 20
|
||||||
|
elif 55 < temperature <= 60:
|
||||||
|
return 30
|
||||||
|
elif 60 < temperature <= 65:
|
||||||
|
return 40
|
||||||
|
else:
|
||||||
|
return -1 # 表示应切换到自动模式
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
temperature: int = max(self.ipmi.temperature())
|
temperature: int = max(self.ipmi.temperature())
|
||||||
logger.info(f'当前最高温度: {temperature}')
|
logger.info(f'当前最高温度: {temperature}')
|
||||||
|
|
||||||
if 0 < temperature <= 50:
|
required_speed = self.get_required_fan_speed(temperature)
|
||||||
self.set_fan_speed(15)
|
|
||||||
elif 50 < temperature <= 55:
|
if required_speed == -1:
|
||||||
self.set_fan_speed(20)
|
# 需要切换到自动模式
|
||||||
elif 55 < temperature <= 60:
|
if not self.is_auto_mode:
|
||||||
self.set_fan_speed(30)
|
logger.info(f'切换风扇为自动模式')
|
||||||
elif 60 < temperature <= 65:
|
self.ipmi.switch_fan_mode(auto=True)
|
||||||
self.set_fan_speed(40)
|
self.is_auto_mode = True
|
||||||
|
self.last_set_speed = None # 重置手动设置的速度
|
||||||
|
else:
|
||||||
|
logger.info(f'当前已是自动模式,无需操作')
|
||||||
else:
|
else:
|
||||||
logger.info(f'切换风扇控制到自动模式')
|
# 需要设置手动风扇速度
|
||||||
self.ipmi.switch_fan_mode(auto=True)
|
if self.is_auto_mode:
|
||||||
|
# 如果当前是自动模式,需要先切换到手动模式
|
||||||
|
logger.info(f'从自动模式切换到手动模式')
|
||||||
|
self.ipmi.switch_fan_mode(auto=False)
|
||||||
|
self.is_auto_mode = False
|
||||||
|
|
||||||
|
# 获取当前风扇转速
|
||||||
|
current_speed = self.ipmi.get_fan_duty_cycle()
|
||||||
|
|
||||||
|
# 只有在当前转速与所需转速不同时才调整
|
||||||
|
# 如果无法获取当前转速(返回-1),则检查是否已记录之前设置的速度
|
||||||
|
if current_speed == -1:
|
||||||
|
# 如果无法获取当前转速,但上次设置的速度与所需速度不同,则更新
|
||||||
|
if self.last_set_speed != required_speed:
|
||||||
|
logger.info(f'无法获取当前风扇转速,但上次设置({self.last_set_speed}%)与需要({required_speed}%)不同,进行设置')
|
||||||
|
self.set_fan_speed(required_speed)
|
||||||
|
self.last_set_speed = required_speed
|
||||||
|
else:
|
||||||
|
logger.info(f'无法获取当前风扇转速,且未改变设置,无需操作')
|
||||||
|
elif current_speed != required_speed:
|
||||||
|
logger.info(f'当前风扇转速: {current_speed}%, 需要转速: {required_speed}%')
|
||||||
|
self.set_fan_speed(required_speed)
|
||||||
|
self.last_set_speed = required_speed
|
||||||
|
else:
|
||||||
|
logger.info(f'当前风扇转速: {current_speed}% 已符合要求,无需调整')
|
||||||
@@ -1,9 +1,12 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
from controller.logger import logger
|
||||||
|
|
||||||
class IpmiTool:
|
class IpmiTool:
|
||||||
|
|
||||||
def __init__(self, host: str, username: str, password: str):
|
def __init__(self, host: str, username: str, password: str):
|
||||||
|
if not host or not username or not password:
|
||||||
|
raise ValueError("host, username and password must be provided")
|
||||||
self.host = host
|
self.host = host
|
||||||
self.username = username
|
self.username = username
|
||||||
self.password = password
|
self.password = password
|
||||||
@@ -11,28 +14,43 @@ class IpmiTool:
|
|||||||
def run_cmd(self, cmd: str) -> str:
|
def run_cmd(self, cmd: str) -> str:
|
||||||
basecmd = f'ipmitool -H {self.host} -I lanplus -U {self.username} -P {self.password}'
|
basecmd = f'ipmitool -H {self.host} -I lanplus -U {self.username} -P {self.password}'
|
||||||
command = f'{basecmd} {cmd}'
|
command = f'{basecmd} {cmd}'
|
||||||
result = subprocess.run(command, shell=True, capture_output=True, text=True)
|
retry_count = 3 # 设置重试次数
|
||||||
|
for attempt in range(retry_count):
|
||||||
|
try:
|
||||||
|
# print(f"Executing command: {command}") # 添加调试信息
|
||||||
|
result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f'执行命令 {cmd} 失败:{result.stderr}'
|
f'IPMI 命令执行失败: {cmd}\n错误详情: {result.stderr}' # 更清晰的错误提示
|
||||||
)
|
)
|
||||||
|
# 添加网络和认证排查提示
|
||||||
|
print("请检查以下内容:")
|
||||||
|
print("1. 确保 BMC 地址可访问(ping 测试或网络配置)。")
|
||||||
|
print("2. 验证用户名、密码是否正确。")
|
||||||
|
print("3. 检查目标设备的 IPMI 功能是否启用。")
|
||||||
|
|
||||||
return result.stdout
|
return result.stdout
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
if attempt < retry_count - 1:
|
||||||
|
logger.warning(f'命令超时,正在重试... (尝试次数 {attempt + 1}/{retry_count})')
|
||||||
|
time.sleep(5) # 每次重试前等待 5 秒
|
||||||
|
else:
|
||||||
|
raise RuntimeError('IPMI 命令超时。请检查网络连接或服务器状态。') # 更明确的错误提示
|
||||||
|
|
||||||
def mc_info(self) -> str:
|
def mc_info(self) -> str:
|
||||||
"""
|
"""
|
||||||
执行 ipmitool 命令 mc info
|
execute ipmitool command mc info
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
return self.run_cmd(cmd='mc info')
|
return self.run_cmd(cmd='mc info')
|
||||||
|
|
||||||
def sensor(self) -> str:
|
def sensor(self) -> str:
|
||||||
"""
|
"""
|
||||||
执行 ipmitool 命令 sensor
|
execute ipmitool command sdr to get sensor data
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
return self.run_cmd(cmd='sensor')
|
return self.run_cmd(cmd='sdr')
|
||||||
|
|
||||||
def temperature(self) -> list:
|
def temperature(self) -> list:
|
||||||
"""
|
"""
|
||||||
@@ -41,13 +59,113 @@ class IpmiTool:
|
|||||||
"""
|
"""
|
||||||
data = self.sensor()
|
data = self.sensor()
|
||||||
temperatures = []
|
temperatures = []
|
||||||
|
import re
|
||||||
|
|
||||||
for line in data.splitlines():
|
for line in data.splitlines():
|
||||||
if 'Temp' in line:
|
if 'Temp' in line and 'degrees C' in line:
|
||||||
temperatures.append(float(line.split('|')[1].strip()))
|
# 提取温度值,例如从 " 25 degrees C" 中提取 25
|
||||||
|
temp_part = line.split('|')[1] # 获取中间列的内容
|
||||||
|
# 使用正则表达式提取数字
|
||||||
|
match = re.search(r'(\d+(\.\d+)?)\s+degrees C', temp_part)
|
||||||
|
if match:
|
||||||
|
temp_value = float(match.group(1))
|
||||||
|
temperatures.append(temp_value)
|
||||||
|
|
||||||
return temperatures
|
return temperatures
|
||||||
|
|
||||||
|
def fan_speeds(self) -> list:
|
||||||
|
"""
|
||||||
|
get current fan speeds
|
||||||
|
:return: list of fan speeds in percentage
|
||||||
|
"""
|
||||||
|
data = self.sensor()
|
||||||
|
fan_speeds = []
|
||||||
|
|
||||||
|
for line in data.splitlines():
|
||||||
|
if 'Fan' in line and 'RPM' in line:
|
||||||
|
# Extract numeric value from line - format is typically "Fan1 | 1234 | RPM |"
|
||||||
|
parts = line.split('|')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
try:
|
||||||
|
# Extract the value and convert RPM to percentage if possible
|
||||||
|
# For Dell servers, we may need to get duty cycle instead
|
||||||
|
value_str = parts[1].strip()
|
||||||
|
if value_str.isdigit():
|
||||||
|
rpm = int(value_str)
|
||||||
|
# Placeholder: we might need to use raw commands to get duty cycle
|
||||||
|
# For now, return the raw value
|
||||||
|
fan_speeds.append(rpm)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return fan_speeds
|
||||||
|
|
||||||
|
def get_fan_duty_cycle(self) -> int:
|
||||||
|
"""
|
||||||
|
get current fan duty cycle/percentage
|
||||||
|
:return: current fan duty cycle in percentage
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Raw command to get current fan duty cycle
|
||||||
|
result = self.run_cmd('raw 0x30 0x31 0x01')
|
||||||
|
# Parse the hex result to get duty cycle
|
||||||
|
result_parts = result.strip().split()
|
||||||
|
if result_parts and len(result_parts) >= 1:
|
||||||
|
# The command should return a hex value representing the duty cycle
|
||||||
|
duty_cycle_hex = result_parts[-1]
|
||||||
|
duty_cycle = int(duty_cycle_hex, 16)
|
||||||
|
# Ensure the value is in valid range (0-100)
|
||||||
|
if 0 <= duty_cycle <= 100 and duty_cycle != 0:
|
||||||
|
# If we get a reasonable value (not 0), return it
|
||||||
|
return duty_cycle
|
||||||
|
elif duty_cycle == 0:
|
||||||
|
# Value of 0 might indicate auto mode or that raw command doesn't return duty cycle on this system
|
||||||
|
logger.info('原始命令返回0,尝试从RPM估算风扇百分比')
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'获取风扇占空比的原始命令失败: {e}')
|
||||||
|
|
||||||
|
# If raw command fails or returns 0, get fan speeds from sensor data and convert to approximate percentage
|
||||||
|
try:
|
||||||
|
data = self.sensor()
|
||||||
|
fan_rpm_values = []
|
||||||
|
import re
|
||||||
|
|
||||||
|
for line in data.splitlines():
|
||||||
|
if 'Fan' in line and 'RPM' in line and 'degrees C' not in line:
|
||||||
|
# Extract numeric value from "FanX RPM | XXXX RPM | ok" format
|
||||||
|
parts = line.split('|')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
rpm_part = parts[1].strip()
|
||||||
|
# Use regex to extract RPM value
|
||||||
|
rpm_match = re.search(r'(\d+)\s+RPM', rpm_part)
|
||||||
|
if rpm_match:
|
||||||
|
rpm_value = int(rpm_match.group(1))
|
||||||
|
fan_rpm_values.append(rpm_value)
|
||||||
|
|
||||||
|
if fan_rpm_values:
|
||||||
|
# Calculate average RPM
|
||||||
|
avg_rpm = sum(fan_rpm_values) / len(fan_rpm_values)
|
||||||
|
|
||||||
|
# Based on calibration: 20% setting results in 4800 RPM
|
||||||
|
# Therefore, 100% would theoretically be 24000 RPM (4800 * 5)
|
||||||
|
# This seems high for typical server fans, but we'll use the calibrated ratio
|
||||||
|
# When 20% = 4800 RPM, the percentage = (current_rpm / 4800) * 20
|
||||||
|
calibrated_rpm_at_20_percent = 4800
|
||||||
|
calibrated_percentage = 20 # This is the known setting
|
||||||
|
|
||||||
|
# Calculate the theoretical max RPM based on the calibration
|
||||||
|
theoretical_max_rpm = calibrated_rpm_at_20_percent * (100 // calibrated_percentage) # 100/20 = 5
|
||||||
|
|
||||||
|
# Calculate the current percentage
|
||||||
|
estimated_percentage = min(100, int((avg_rpm / theoretical_max_rpm) * 100))
|
||||||
|
|
||||||
|
# Round to nearest 5 to match typical percentage steps
|
||||||
|
estimated_percentage = round(estimated_percentage / 5) * 5
|
||||||
|
return min(100, estimated_percentage)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'解析传感器数据获取风扇RPM失败: {e}')
|
||||||
|
|
||||||
|
return -1 # Return -1 if unable to determine
|
||||||
|
|
||||||
def switch_fan_mode(self, auto: bool):
|
def switch_fan_mode(self, auto: bool):
|
||||||
"""
|
"""
|
||||||
switch the fan mode
|
switch the fan mode
|
||||||
@@ -71,4 +189,4 @@ class IpmiTool:
|
|||||||
|
|
||||||
self.switch_fan_mode(auto=False)
|
self.switch_fan_mode(auto=False)
|
||||||
base_cmd = 'raw 0x30 0x30 0x02 0xff'
|
base_cmd = 'raw 0x30 0x30 0x02 0xff'
|
||||||
return self.run_cmd(cmd=f'{base_cmd} {hex(speed)}')
|
return self.run_cmd(cmd=f'{base_cmd} {hex(speed)}')
|
||||||
17
start.py
17
start.py
@@ -7,25 +7,24 @@ from controller.logger import logger
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
host = os.getenv('HOST')
|
host = "10.10.11.11" #os.getenv('HOST') │
|
||||||
username = os.getenv('USERNAME')
|
username = "root" #os.getenv('USERNAME') │
|
||||||
password = os.getenv('PASSWORD')
|
password = "ddmabc123" #os.getenv('PASSWORD')
|
||||||
|
|
||||||
if host is None:
|
if host is None:
|
||||||
raise RuntimeError('HOST 环境变量未设置')
|
raise RuntimeError('未设置 HOST 环境变量')
|
||||||
|
|
||||||
if username is None:
|
if username is None:
|
||||||
raise RuntimeError('USERNAME 环境变量未设置')
|
raise RuntimeError('未设置 USERNAME 环境变量')
|
||||||
|
|
||||||
if password is None:
|
if password is None:
|
||||||
raise RuntimeError('PASSWORD 环境变量未设置')
|
raise RuntimeError('未设置 PASSWORD 环境变量')
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
client = FanController(host=host, username=username, password=password)
|
client = FanController(host=host, username=username, password=password)
|
||||||
client.run()
|
client.run()
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error(
|
logger.error(
|
||||||
f'运行控制器失败 {err}. {traceback.format_exc()}'
|
f'运行控制器失败 {err}. {traceback.format_exc()}'
|
||||||
)
|
)
|
||||||
Reference in New Issue
Block a user