From 008f50a8c0b7c3ca7a1820d4102dc8e3159bc00a Mon Sep 17 00:00:00 2001 From: lkddi Date: Tue, 2 Dec 2025 18:14:22 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E8=BF=9BIPMI=E7=BD=91=E7=BB=9C?= =?UTF-8?q?=E8=BF=9E=E6=8E=A5=E7=9A=84=E5=81=A5=E5=A3=AE=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 增加重试次数从3到5次以应对网络波动 - 增加超时时间到60秒 - 添加针对IPMI会话建立失败的特殊处理 - 增加重试间隔时间,提高网络问题的恢复能力 --- controller/ipmi.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/controller/ipmi.py b/controller/ipmi.py index 306fcce..fc1dcc9 100644 --- a/controller/ipmi.py +++ b/controller/ipmi.py @@ -14,29 +14,38 @@ class IpmiTool: def run_cmd(self, cmd: str) -> str: basecmd = f'ipmitool -H {self.host} -I lanplus -U {self.username} -P {self.password}' command = f'{basecmd} {cmd}' - retry_count = 3 # 设置重试次数 + retry_count = 5 # 增加重试次数以应对网络波动 for attempt in range(retry_count): try: # print(f"Executing command: {command}") # 添加调试信息 - result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30) + result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=60) # 增加超时时间 if result.returncode != 0: + error_msg = result.stderr.strip() + # 检查是否是网络连接问题 + if "Unable to establish IPMI" in error_msg or "session" in error_msg: + logger.warning(f'IPMI会话建立失败 (尝试 {attempt + 1}/{retry_count}): {error_msg}') + if attempt < retry_count - 1: + time.sleep(10) # 网络问题需要更长的等待时间 + continue raise RuntimeError( - f'IPMI 命令执行失败: {cmd}\n错误详情: {result.stderr}' # 更清晰的错误提示 + f'IPMI 命令执行失败: {cmd}\n错误详情: {error_msg}' # 更清晰的错误提示 ) - # 添加网络和认证排查提示 - print("请检查以下内容:") - print("1. 确保 BMC 地址可访问(ping 测试或网络配置)。") - print("2. 验证用户名、密码是否正确。") - print("3. 检查目标设备的 IPMI 功能是否启用。") return result.stdout except subprocess.TimeoutExpired: + logger.warning(f'命令超时 (尝试 {attempt + 1}/{retry_count})') if attempt < retry_count - 1: - logger.warning(f'命令超时,正在重试... (尝试次数 {attempt + 1}/{retry_count})') - time.sleep(5) # 每次重试前等待 5 秒 + logger.warning(f'正在重试... (尝试次数 {attempt + 1}/{retry_count})') + time.sleep(10) # 每次重试前等待更长时间 else: raise RuntimeError('IPMI 命令超时。请检查网络连接或服务器状态。') # 更明确的错误提示 + except Exception as e: + logger.warning(f'IPMI命令执行异常 (尝试 {attempt + 1}/{retry_count}): {str(e)}') + if attempt < retry_count - 1: + time.sleep(10) # 网络问题需要更长的等待时间 + else: + raise e def mc_info(self) -> str: """